{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.999366420274551, "global_step": 11830, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.401252746582031, "epoch": 0.0, "learning_rate": 4.2265426880811495e-08, "loss": 10.0096, "step": 1, "task_loss": 4.892626762390137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.052549362182617, "epoch": 0.0, "learning_rate": 8.453085376162299e-08, "loss": 11.6818, "step": 2, "task_loss": 4.738134860992432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.589045524597168, "epoch": 0.0, "learning_rate": 1.267962806424345e-07, "loss": 10.5285, "step": 3, "task_loss": 4.7521891593933105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.158231735229492, "epoch": 0.0, "learning_rate": 1.6906170752324598e-07, "loss": 10.5274, "step": 4, "task_loss": 4.635807037353516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.926199913024902, "epoch": 0.0, "learning_rate": 2.113271344040575e-07, "loss": 10.9619, "step": 5, "task_loss": 4.720554351806641 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.131906509399414, "epoch": 0.01, "learning_rate": 2.53592561284869e-07, "loss": 10.35, "step": 6, "task_loss": 4.6364426612854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.701253890991211, "epoch": 0.01, "learning_rate": 2.958579881656805e-07, "loss": 10.8679, "step": 7, "task_loss": 4.6951212882995605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.522237777709961, "epoch": 0.01, "learning_rate": 3.3812341504649196e-07, "loss": 10.8903, "step": 8, "task_loss": 4.78691291809082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.856358528137207, "epoch": 0.01, "learning_rate": 3.803888419273035e-07, "loss": 9.9278, "step": 9, "task_loss": 4.664228916168213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.893524169921875, "epoch": 0.01, "learning_rate": 4.22654268808115e-07, "loss": 11.1883, "step": 10, "task_loss": 4.826891899108887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.096457481384277, "epoch": 0.01, "learning_rate": 4.649196956889265e-07, "loss": 10.3822, "step": 11, "task_loss": 4.564452648162842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.837053298950195, "epoch": 0.01, "learning_rate": 5.07185122569738e-07, "loss": 11.1518, "step": 12, "task_loss": 4.645312309265137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.267672538757324, "epoch": 0.01, "learning_rate": 5.494505494505495e-07, "loss": 10.7131, "step": 13, "task_loss": 4.605127811431885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.251684188842773, "epoch": 0.01, "learning_rate": 5.91715976331361e-07, "loss": 10.9161, "step": 14, "task_loss": 4.689237594604492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.489134788513184, "epoch": 0.01, "learning_rate": 6.339814032121725e-07, "loss": 10.8911, "step": 15, "task_loss": 4.611380577087402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.199850082397461, "epoch": 0.01, "learning_rate": 6.762468300929839e-07, "loss": 10.8487, "step": 16, "task_loss": 4.605398654937744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.422825813293457, "epoch": 0.01, "learning_rate": 7.185122569737954e-07, "loss": 10.6799, "step": 17, "task_loss": 4.498167991638184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.603635787963867, "epoch": 0.02, "learning_rate": 7.60777683854607e-07, "loss": 10.4454, "step": 18, "task_loss": 4.790456771850586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.166597366333008, "epoch": 0.02, "learning_rate": 8.030431107354184e-07, "loss": 10.2467, "step": 19, "task_loss": 4.694336414337158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.528684616088867, "epoch": 0.02, "learning_rate": 8.4530853761623e-07, "loss": 10.782, "step": 20, "task_loss": 4.7602434158325195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.959434509277344, "epoch": 0.02, "learning_rate": 8.875739644970415e-07, "loss": 10.9856, "step": 21, "task_loss": 4.760715007781982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.071928024291992, "epoch": 0.02, "learning_rate": 9.29839391377853e-07, "loss": 10.8372, "step": 22, "task_loss": 4.692850112915039 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.15329360961914, "epoch": 0.02, "learning_rate": 9.721048182586645e-07, "loss": 10.956, "step": 23, "task_loss": 4.75758695602417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.30512523651123, "epoch": 0.02, "learning_rate": 1.014370245139476e-06, "loss": 11.4137, "step": 24, "task_loss": 4.612138271331787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.716592788696289, "epoch": 0.02, "learning_rate": 1.0566356720202875e-06, "loss": 10.7737, "step": 25, "task_loss": 4.490636348724365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.638872146606445, "epoch": 0.02, "learning_rate": 1.098901098901099e-06, "loss": 11.0991, "step": 26, "task_loss": 4.579260349273682 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.279645919799805, "epoch": 0.02, "learning_rate": 1.1411665257819105e-06, "loss": 10.929, "step": 27, "task_loss": 4.617313385009766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.06277847290039, "epoch": 0.02, "learning_rate": 1.183431952662722e-06, "loss": 11.94, "step": 28, "task_loss": 4.556580543518066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.53848648071289, "epoch": 0.02, "learning_rate": 1.2256973795435333e-06, "loss": 10.6398, "step": 29, "task_loss": 4.736866474151611 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.020011901855469, "epoch": 0.03, "learning_rate": 1.267962806424345e-06, "loss": 11.1561, "step": 30, "task_loss": 4.692272663116455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.774686813354492, "epoch": 0.03, "learning_rate": 1.3102282333051563e-06, "loss": 10.7223, "step": 31, "task_loss": 4.611364841461182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.925409317016602, "epoch": 0.03, "learning_rate": 1.3524936601859678e-06, "loss": 10.9749, "step": 32, "task_loss": 4.707573413848877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.56225872039795, "epoch": 0.03, "learning_rate": 1.3947590870667795e-06, "loss": 10.4727, "step": 33, "task_loss": 4.521960735321045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.466785430908203, "epoch": 0.03, "learning_rate": 1.4370245139475908e-06, "loss": 10.0686, "step": 34, "task_loss": 4.677103042602539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.867298126220703, "epoch": 0.03, "learning_rate": 1.4792899408284024e-06, "loss": 10.7959, "step": 35, "task_loss": 4.7338361740112305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 14.69761848449707, "epoch": 0.03, "learning_rate": 1.521555367709214e-06, "loss": 11.7033, "step": 36, "task_loss": 4.766733169555664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.110300064086914, "epoch": 0.03, "learning_rate": 1.5638207945900256e-06, "loss": 10.2657, "step": 37, "task_loss": 4.644844055175781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.515869140625, "epoch": 0.03, "learning_rate": 1.6060862214708369e-06, "loss": 11.6068, "step": 38, "task_loss": 4.657038688659668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.178262710571289, "epoch": 0.03, "learning_rate": 1.6483516483516484e-06, "loss": 11.5534, "step": 39, "task_loss": 4.716360569000244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.793022155761719, "epoch": 0.03, "learning_rate": 1.69061707523246e-06, "loss": 10.3024, "step": 40, "task_loss": 4.584822654724121 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.980836868286133, "epoch": 0.03, "learning_rate": 1.7328825021132714e-06, "loss": 10.2114, "step": 41, "task_loss": 4.645711898803711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.877447128295898, "epoch": 0.04, "learning_rate": 1.775147928994083e-06, "loss": 10.991, "step": 42, "task_loss": 4.712393760681152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.909172058105469, "epoch": 0.04, "learning_rate": 1.8174133558748946e-06, "loss": 10.9789, "step": 43, "task_loss": 4.523096561431885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.710283279418945, "epoch": 0.04, "learning_rate": 1.859678782755706e-06, "loss": 10.1556, "step": 44, "task_loss": 4.549439907073975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.420883178710938, "epoch": 0.04, "learning_rate": 1.9019442096365174e-06, "loss": 11.1205, "step": 45, "task_loss": 4.64084529876709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.881129264831543, "epoch": 0.04, "learning_rate": 1.944209636517329e-06, "loss": 11.3197, "step": 46, "task_loss": 4.605323314666748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.959627151489258, "epoch": 0.04, "learning_rate": 1.9864750633981404e-06, "loss": 10.0134, "step": 47, "task_loss": 4.76186990737915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.785381317138672, "epoch": 0.04, "learning_rate": 2.028740490278952e-06, "loss": 11.2724, "step": 48, "task_loss": 4.572047233581543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.291681289672852, "epoch": 0.04, "learning_rate": 2.0710059171597635e-06, "loss": 9.8133, "step": 49, "task_loss": 4.580976963043213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.108386039733887, "epoch": 0.04, "learning_rate": 2.113271344040575e-06, "loss": 10.9727, "step": 50, "task_loss": 4.514222145080566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.9840087890625, "epoch": 0.04, "learning_rate": 2.1555367709213865e-06, "loss": 11.1356, "step": 51, "task_loss": 4.773179054260254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.960487365722656, "epoch": 0.04, "learning_rate": 2.197802197802198e-06, "loss": 10.6345, "step": 52, "task_loss": 4.771177291870117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.318519592285156, "epoch": 0.04, "learning_rate": 2.2400676246830095e-06, "loss": 10.9351, "step": 53, "task_loss": 4.643435478210449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.702406883239746, "epoch": 0.05, "learning_rate": 2.282333051563821e-06, "loss": 10.8739, "step": 54, "task_loss": 4.767299175262451 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.626771926879883, "epoch": 0.05, "learning_rate": 2.324598478444632e-06, "loss": 10.4667, "step": 55, "task_loss": 4.659745216369629 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.181329727172852, "epoch": 0.05, "learning_rate": 2.366863905325444e-06, "loss": 11.1587, "step": 56, "task_loss": 4.50927734375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.474700927734375, "epoch": 0.05, "learning_rate": 2.4091293322062555e-06, "loss": 10.7587, "step": 57, "task_loss": 4.735442638397217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.157629013061523, "epoch": 0.05, "learning_rate": 2.4513947590870666e-06, "loss": 10.5098, "step": 58, "task_loss": 4.7120280265808105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.597808837890625, "epoch": 0.05, "learning_rate": 2.4936601859678785e-06, "loss": 10.9239, "step": 59, "task_loss": 4.544466495513916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.338491439819336, "epoch": 0.05, "learning_rate": 2.53592561284869e-06, "loss": 11.2097, "step": 60, "task_loss": 4.595733165740967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.5025634765625, "epoch": 0.05, "learning_rate": 2.578191039729501e-06, "loss": 10.1109, "step": 61, "task_loss": 4.481218338012695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.911174774169922, "epoch": 0.05, "learning_rate": 2.6204564666103126e-06, "loss": 10.4209, "step": 62, "task_loss": 4.755753993988037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.287789344787598, "epoch": 0.05, "learning_rate": 2.6627218934911246e-06, "loss": 10.6676, "step": 63, "task_loss": 4.576233863830566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.504301071166992, "epoch": 0.05, "learning_rate": 2.7049873203719357e-06, "loss": 10.5224, "step": 64, "task_loss": 4.584009170532227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.620535850524902, "epoch": 0.05, "learning_rate": 2.747252747252747e-06, "loss": 11.4484, "step": 65, "task_loss": 4.66487979888916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.62692642211914, "epoch": 0.06, "learning_rate": 2.789518174133559e-06, "loss": 11.0338, "step": 66, "task_loss": 4.6078877449035645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.585807800292969, "epoch": 0.06, "learning_rate": 2.83178360101437e-06, "loss": 10.76, "step": 67, "task_loss": 4.583164215087891 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.091007232666016, "epoch": 0.06, "learning_rate": 2.8740490278951817e-06, "loss": 10.6261, "step": 68, "task_loss": 4.843503952026367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.112598419189453, "epoch": 0.06, "learning_rate": 2.9163144547759936e-06, "loss": 11.1768, "step": 69, "task_loss": 4.736169815063477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.72795581817627, "epoch": 0.06, "learning_rate": 2.9585798816568047e-06, "loss": 10.6404, "step": 70, "task_loss": 4.642921447753906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.319413185119629, "epoch": 0.06, "learning_rate": 3.0008453085376162e-06, "loss": 10.0057, "step": 71, "task_loss": 4.7453932762146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.833139419555664, "epoch": 0.06, "learning_rate": 3.043110735418428e-06, "loss": 10.241, "step": 72, "task_loss": 4.611067295074463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.642141342163086, "epoch": 0.06, "learning_rate": 3.0853761622992392e-06, "loss": 11.5514, "step": 73, "task_loss": 4.551996231079102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.007657051086426, "epoch": 0.06, "learning_rate": 3.127641589180051e-06, "loss": 10.2966, "step": 74, "task_loss": 4.746895790100098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.952818870544434, "epoch": 0.06, "learning_rate": 3.1699070160608622e-06, "loss": 10.6516, "step": 75, "task_loss": 4.483114719390869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.456079483032227, "epoch": 0.06, "learning_rate": 3.2121724429416738e-06, "loss": 10.714, "step": 76, "task_loss": 4.63083553314209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.359310150146484, "epoch": 0.07, "learning_rate": 3.2544378698224853e-06, "loss": 11.1745, "step": 77, "task_loss": 4.611959934234619 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.901968955993652, "epoch": 0.07, "learning_rate": 3.2967032967032968e-06, "loss": 11.0845, "step": 78, "task_loss": 4.656195640563965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.351903915405273, "epoch": 0.07, "learning_rate": 3.3389687235841087e-06, "loss": 10.2119, "step": 79, "task_loss": 4.559196472167969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.532894134521484, "epoch": 0.07, "learning_rate": 3.38123415046492e-06, "loss": 11.6145, "step": 80, "task_loss": 4.518980503082275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.81486988067627, "epoch": 0.07, "learning_rate": 3.4234995773457313e-06, "loss": 10.3252, "step": 81, "task_loss": 4.745222091674805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.91808795928955, "epoch": 0.07, "learning_rate": 3.465765004226543e-06, "loss": 11.4492, "step": 82, "task_loss": 4.599747657775879 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.75035285949707, "epoch": 0.07, "learning_rate": 3.5080304311073543e-06, "loss": 12.0154, "step": 83, "task_loss": 4.534034729003906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.416339874267578, "epoch": 0.07, "learning_rate": 3.550295857988166e-06, "loss": 11.7797, "step": 84, "task_loss": 4.6087446212768555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.104852676391602, "epoch": 0.07, "learning_rate": 3.5925612848689777e-06, "loss": 11.2088, "step": 85, "task_loss": 4.582298755645752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.853858947753906, "epoch": 0.07, "learning_rate": 3.6348267117497893e-06, "loss": 10.4641, "step": 86, "task_loss": 4.735517501831055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.215934753417969, "epoch": 0.07, "learning_rate": 3.6770921386306e-06, "loss": 10.4625, "step": 87, "task_loss": 4.575207233428955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.155092239379883, "epoch": 0.07, "learning_rate": 3.719357565511412e-06, "loss": 10.6338, "step": 88, "task_loss": 4.618195056915283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.937198638916016, "epoch": 0.08, "learning_rate": 3.7616229923922234e-06, "loss": 11.688, "step": 89, "task_loss": 4.624277591705322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.145498275756836, "epoch": 0.08, "learning_rate": 3.803888419273035e-06, "loss": 10.4532, "step": 90, "task_loss": 4.740138530731201 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.845165252685547, "epoch": 0.08, "learning_rate": 3.846153846153847e-06, "loss": 10.7544, "step": 91, "task_loss": 4.642944812774658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.508260726928711, "epoch": 0.08, "learning_rate": 3.888419273034658e-06, "loss": 10.247, "step": 92, "task_loss": 4.689677715301514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.763487815856934, "epoch": 0.08, "learning_rate": 3.930684699915469e-06, "loss": 10.4696, "step": 93, "task_loss": 4.511449813842773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.989532470703125, "epoch": 0.08, "learning_rate": 3.972950126796281e-06, "loss": 11.4147, "step": 94, "task_loss": 4.630776405334473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.195806503295898, "epoch": 0.08, "learning_rate": 4.015215553677092e-06, "loss": 10.5455, "step": 95, "task_loss": 4.391584396362305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.314935684204102, "epoch": 0.08, "learning_rate": 4.057480980557904e-06, "loss": 10.7707, "step": 96, "task_loss": 4.4360032081604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.252174377441406, "epoch": 0.08, "learning_rate": 4.099746407438716e-06, "loss": 10.3222, "step": 97, "task_loss": 4.609511852264404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.970467567443848, "epoch": 0.08, "learning_rate": 4.142011834319527e-06, "loss": 10.765, "step": 98, "task_loss": 4.600650787353516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.502471923828125, "epoch": 0.08, "learning_rate": 4.184277261200338e-06, "loss": 11.2731, "step": 99, "task_loss": 4.553318977355957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.837301254272461, "epoch": 0.08, "learning_rate": 4.22654268808115e-06, "loss": 10.4816, "step": 100, "task_loss": 4.524878025054932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.877593040466309, "epoch": 0.09, "learning_rate": 4.268808114961961e-06, "loss": 10.378, "step": 101, "task_loss": 4.581943988800049 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.747071266174316, "epoch": 0.09, "learning_rate": 4.311073541842773e-06, "loss": 10.1761, "step": 102, "task_loss": 4.500858306884766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.369998931884766, "epoch": 0.09, "learning_rate": 4.353338968723585e-06, "loss": 10.1041, "step": 103, "task_loss": 4.541271686553955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.75676155090332, "epoch": 0.09, "learning_rate": 4.395604395604396e-06, "loss": 10.3307, "step": 104, "task_loss": 4.611202716827393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.97954273223877, "epoch": 0.09, "learning_rate": 4.437869822485207e-06, "loss": 10.6151, "step": 105, "task_loss": 4.574236869812012 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.60630989074707, "epoch": 0.09, "learning_rate": 4.480135249366019e-06, "loss": 11.1391, "step": 106, "task_loss": 4.621090412139893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.409262657165527, "epoch": 0.09, "learning_rate": 4.52240067624683e-06, "loss": 10.8054, "step": 107, "task_loss": 4.612370491027832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.864517211914062, "epoch": 0.09, "learning_rate": 4.564666103127642e-06, "loss": 10.9043, "step": 108, "task_loss": 4.572620868682861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.147832870483398, "epoch": 0.09, "learning_rate": 4.606931530008454e-06, "loss": 10.2476, "step": 109, "task_loss": 4.456898212432861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.922043800354004, "epoch": 0.09, "learning_rate": 4.649196956889264e-06, "loss": 11.026, "step": 110, "task_loss": 4.477705478668213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.049093246459961, "epoch": 0.09, "learning_rate": 4.691462383770076e-06, "loss": 9.8742, "step": 111, "task_loss": 4.421534538269043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.542190551757812, "epoch": 0.09, "learning_rate": 4.733727810650888e-06, "loss": 10.1631, "step": 112, "task_loss": 4.670546531677246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.381237030029297, "epoch": 0.1, "learning_rate": 4.775993237531699e-06, "loss": 10.9929, "step": 113, "task_loss": 4.543846130371094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.57253646850586, "epoch": 0.1, "learning_rate": 4.818258664412511e-06, "loss": 11.2002, "step": 114, "task_loss": 4.487604141235352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.053580284118652, "epoch": 0.1, "learning_rate": 4.860524091293322e-06, "loss": 10.0694, "step": 115, "task_loss": 4.460312366485596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.066115379333496, "epoch": 0.1, "learning_rate": 4.902789518174133e-06, "loss": 10.4626, "step": 116, "task_loss": 4.541171550750732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.238849639892578, "epoch": 0.1, "learning_rate": 4.945054945054945e-06, "loss": 10.6713, "step": 117, "task_loss": 4.496248722076416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.923192977905273, "epoch": 0.1, "learning_rate": 4.987320371935757e-06, "loss": 10.9967, "step": 118, "task_loss": 4.532353401184082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.618034362792969, "epoch": 0.1, "learning_rate": 5.029585798816568e-06, "loss": 11.3353, "step": 119, "task_loss": 4.392609596252441 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.99794864654541, "epoch": 0.1, "learning_rate": 5.07185122569738e-06, "loss": 10.5346, "step": 120, "task_loss": 4.493141174316406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.20986557006836, "epoch": 0.1, "learning_rate": 5.114116652578191e-06, "loss": 10.4818, "step": 121, "task_loss": 4.539980888366699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.840202331542969, "epoch": 0.1, "learning_rate": 5.156382079459002e-06, "loss": 10.3844, "step": 122, "task_loss": 4.449349403381348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.205333709716797, "epoch": 0.1, "learning_rate": 5.198647506339814e-06, "loss": 10.6117, "step": 123, "task_loss": 4.405229568481445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.239276885986328, "epoch": 0.1, "learning_rate": 5.240912933220625e-06, "loss": 10.2592, "step": 124, "task_loss": 4.615588188171387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.001930236816406, "epoch": 0.11, "learning_rate": 5.283178360101437e-06, "loss": 10.352, "step": 125, "task_loss": 4.410690784454346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.85190200805664, "epoch": 0.11, "learning_rate": 5.325443786982249e-06, "loss": 10.5959, "step": 126, "task_loss": 4.361550331115723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.532184600830078, "epoch": 0.11, "learning_rate": 5.36770921386306e-06, "loss": 10.8546, "step": 127, "task_loss": 4.560980796813965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.168941497802734, "epoch": 0.11, "learning_rate": 5.409974640743871e-06, "loss": 9.945, "step": 128, "task_loss": 4.319516658782959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.664264678955078, "epoch": 0.11, "learning_rate": 5.452240067624683e-06, "loss": 10.0059, "step": 129, "task_loss": 4.529840469360352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.364313125610352, "epoch": 0.11, "learning_rate": 5.494505494505494e-06, "loss": 10.6434, "step": 130, "task_loss": 4.567710876464844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.61684799194336, "epoch": 0.11, "learning_rate": 5.536770921386306e-06, "loss": 10.9652, "step": 131, "task_loss": 4.543725967407227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.296420097351074, "epoch": 0.11, "learning_rate": 5.579036348267118e-06, "loss": 10.8019, "step": 132, "task_loss": 4.392816543579102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.244513511657715, "epoch": 0.11, "learning_rate": 5.621301775147929e-06, "loss": 10.3166, "step": 133, "task_loss": 4.524864196777344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.795333862304688, "epoch": 0.11, "learning_rate": 5.66356720202874e-06, "loss": 9.9061, "step": 134, "task_loss": 4.428186416625977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.680121421813965, "epoch": 0.11, "learning_rate": 5.705832628909552e-06, "loss": 11.0451, "step": 135, "task_loss": 4.4648847579956055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.783164978027344, "epoch": 0.11, "learning_rate": 5.748098055790363e-06, "loss": 10.5381, "step": 136, "task_loss": 4.5345869064331055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.702608108520508, "epoch": 0.12, "learning_rate": 5.790363482671175e-06, "loss": 10.5336, "step": 137, "task_loss": 4.530561923980713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.673863410949707, "epoch": 0.12, "learning_rate": 5.832628909551987e-06, "loss": 10.0525, "step": 138, "task_loss": 4.549605846405029 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.31637191772461, "epoch": 0.12, "learning_rate": 5.874894336432798e-06, "loss": 11.046, "step": 139, "task_loss": 4.417450428009033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.60335922241211, "epoch": 0.12, "learning_rate": 5.917159763313609e-06, "loss": 11.0033, "step": 140, "task_loss": 4.456324577331543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.492410659790039, "epoch": 0.12, "learning_rate": 5.959425190194421e-06, "loss": 9.6602, "step": 141, "task_loss": 4.540316104888916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.16151237487793, "epoch": 0.12, "learning_rate": 6.0016906170752324e-06, "loss": 10.0926, "step": 142, "task_loss": 4.438863754272461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.215349197387695, "epoch": 0.12, "learning_rate": 6.043956043956044e-06, "loss": 11.0029, "step": 143, "task_loss": 4.328216075897217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.26711654663086, "epoch": 0.12, "learning_rate": 6.086221470836856e-06, "loss": 10.9748, "step": 144, "task_loss": 4.4802165031433105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.34218978881836, "epoch": 0.12, "learning_rate": 6.128486897717667e-06, "loss": 10.8035, "step": 145, "task_loss": 4.443931579589844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.075801849365234, "epoch": 0.12, "learning_rate": 6.1707523245984785e-06, "loss": 10.653, "step": 146, "task_loss": 4.379349231719971 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.428898811340332, "epoch": 0.12, "learning_rate": 6.21301775147929e-06, "loss": 10.4691, "step": 147, "task_loss": 4.337327480316162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.50244140625, "epoch": 0.13, "learning_rate": 6.255283178360102e-06, "loss": 10.2231, "step": 148, "task_loss": 4.468355178833008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.08976936340332, "epoch": 0.13, "learning_rate": 6.297548605240913e-06, "loss": 10.1672, "step": 149, "task_loss": 4.255422115325928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.594917297363281, "epoch": 0.13, "learning_rate": 6.3398140321217245e-06, "loss": 10.8461, "step": 150, "task_loss": 4.49447774887085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.48318099975586, "epoch": 0.13, "learning_rate": 6.382079459002536e-06, "loss": 9.5377, "step": 151, "task_loss": 4.561052322387695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.20145034790039, "epoch": 0.13, "learning_rate": 6.4243448858833475e-06, "loss": 10.5525, "step": 152, "task_loss": 4.390120983123779 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.688833236694336, "epoch": 0.13, "learning_rate": 6.4666103127641594e-06, "loss": 10.2754, "step": 153, "task_loss": 4.304681777954102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.814868927001953, "epoch": 0.13, "learning_rate": 6.5088757396449705e-06, "loss": 10.0, "step": 154, "task_loss": 4.3498992919921875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.885833740234375, "epoch": 0.13, "learning_rate": 6.551141166525782e-06, "loss": 10.0414, "step": 155, "task_loss": 4.327273845672607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.6326904296875, "epoch": 0.13, "learning_rate": 6.5934065934065935e-06, "loss": 10.1731, "step": 156, "task_loss": 4.250783443450928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.790252685546875, "epoch": 0.13, "learning_rate": 6.635672020287405e-06, "loss": 9.8521, "step": 157, "task_loss": 4.512824058532715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.195863723754883, "epoch": 0.13, "learning_rate": 6.677937447168217e-06, "loss": 10.465, "step": 158, "task_loss": 4.231414794921875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.076541900634766, "epoch": 0.13, "learning_rate": 6.720202874049028e-06, "loss": 10.5894, "step": 159, "task_loss": 4.295960903167725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.61097526550293, "epoch": 0.14, "learning_rate": 6.76246830092984e-06, "loss": 10.3264, "step": 160, "task_loss": 4.200234413146973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.298629760742188, "epoch": 0.14, "learning_rate": 6.8047337278106515e-06, "loss": 10.3089, "step": 161, "task_loss": 4.260715484619141 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.261417388916016, "epoch": 0.14, "learning_rate": 6.846999154691463e-06, "loss": 10.9172, "step": 162, "task_loss": 4.159542083740234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.925491333007812, "epoch": 0.14, "learning_rate": 6.8892645815722745e-06, "loss": 10.8552, "step": 163, "task_loss": 4.678323745727539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.751452445983887, "epoch": 0.14, "learning_rate": 6.931530008453086e-06, "loss": 9.9297, "step": 164, "task_loss": 4.311061859130859 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.707817077636719, "epoch": 0.14, "learning_rate": 6.9737954353338975e-06, "loss": 10.4312, "step": 165, "task_loss": 4.241760730743408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.049520492553711, "epoch": 0.14, "learning_rate": 7.016060862214709e-06, "loss": 10.6936, "step": 166, "task_loss": 4.228716850280762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.306411743164062, "epoch": 0.14, "learning_rate": 7.05832628909552e-06, "loss": 10.3048, "step": 167, "task_loss": 4.350638389587402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.849008560180664, "epoch": 0.14, "learning_rate": 7.100591715976332e-06, "loss": 9.5438, "step": 168, "task_loss": 4.0851240158081055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.718179702758789, "epoch": 0.14, "learning_rate": 7.142857142857143e-06, "loss": 11.0949, "step": 169, "task_loss": 4.1237568855285645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.567625999450684, "epoch": 0.14, "learning_rate": 7.1851225697379555e-06, "loss": 11.1414, "step": 170, "task_loss": 4.12692403793335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.928383827209473, "epoch": 0.14, "learning_rate": 7.227387996618766e-06, "loss": 10.2438, "step": 171, "task_loss": 4.174402236938477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.179824829101562, "epoch": 0.15, "learning_rate": 7.2696534234995785e-06, "loss": 11.0575, "step": 172, "task_loss": 4.029665946960449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.795778274536133, "epoch": 0.15, "learning_rate": 7.31191885038039e-06, "loss": 10.3031, "step": 173, "task_loss": 4.112843990325928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.9187211990356445, "epoch": 0.15, "learning_rate": 7.3541842772612e-06, "loss": 9.3362, "step": 174, "task_loss": 4.208075046539307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.734969139099121, "epoch": 0.15, "learning_rate": 7.396449704142013e-06, "loss": 10.727, "step": 175, "task_loss": 4.331492900848389 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.250337600708008, "epoch": 0.15, "learning_rate": 7.438715131022824e-06, "loss": 9.9562, "step": 176, "task_loss": 4.499085903167725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.62624740600586, "epoch": 0.15, "learning_rate": 7.480980557903636e-06, "loss": 9.162, "step": 177, "task_loss": 4.3066325187683105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.904439926147461, "epoch": 0.15, "learning_rate": 7.523245984784447e-06, "loss": 9.9839, "step": 178, "task_loss": 4.188817501068115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.373298645019531, "epoch": 0.15, "learning_rate": 7.565511411665258e-06, "loss": 10.1923, "step": 179, "task_loss": 4.187190532684326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.411502838134766, "epoch": 0.15, "learning_rate": 7.60777683854607e-06, "loss": 9.2368, "step": 180, "task_loss": 4.162166118621826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.27265739440918, "epoch": 0.15, "learning_rate": 7.65004226542688e-06, "loss": 9.8704, "step": 181, "task_loss": 4.186618328094482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.351598739624023, "epoch": 0.15, "learning_rate": 7.692307692307694e-06, "loss": 10.1579, "step": 182, "task_loss": 4.1490559577941895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.26500415802002, "epoch": 0.15, "learning_rate": 7.734573119188505e-06, "loss": 9.8424, "step": 183, "task_loss": 4.214407920837402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.788366317749023, "epoch": 0.16, "learning_rate": 7.776838546069316e-06, "loss": 9.6744, "step": 184, "task_loss": 4.2683563232421875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.168024063110352, "epoch": 0.16, "learning_rate": 7.819103972950127e-06, "loss": 9.9332, "step": 185, "task_loss": 4.358354568481445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.690860748291016, "epoch": 0.16, "learning_rate": 7.861369399830938e-06, "loss": 9.4314, "step": 186, "task_loss": 3.832334518432617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.102380752563477, "epoch": 0.16, "learning_rate": 7.90363482671175e-06, "loss": 10.4519, "step": 187, "task_loss": 4.0241312980651855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.26218318939209, "epoch": 0.16, "learning_rate": 7.945900253592562e-06, "loss": 10.5253, "step": 188, "task_loss": 4.109080791473389 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.979167938232422, "epoch": 0.16, "learning_rate": 7.988165680473373e-06, "loss": 9.9005, "step": 189, "task_loss": 4.276987552642822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.066683769226074, "epoch": 0.16, "learning_rate": 8.030431107354184e-06, "loss": 10.2259, "step": 190, "task_loss": 3.8080692291259766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.64475154876709, "epoch": 0.16, "learning_rate": 8.072696534234995e-06, "loss": 11.1235, "step": 191, "task_loss": 4.027547836303711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.322698593139648, "epoch": 0.16, "learning_rate": 8.114961961115808e-06, "loss": 10.4304, "step": 192, "task_loss": 4.164212226867676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.968287467956543, "epoch": 0.16, "learning_rate": 8.157227387996619e-06, "loss": 9.7028, "step": 193, "task_loss": 4.011444568634033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.517049789428711, "epoch": 0.16, "learning_rate": 8.199492814877432e-06, "loss": 9.5686, "step": 194, "task_loss": 4.35103178024292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.970458984375, "epoch": 0.16, "learning_rate": 8.241758241758243e-06, "loss": 10.1229, "step": 195, "task_loss": 3.974368095397949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.064119338989258, "epoch": 0.17, "learning_rate": 8.284023668639054e-06, "loss": 9.8515, "step": 196, "task_loss": 4.241404056549072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.40321159362793, "epoch": 0.17, "learning_rate": 8.326289095519865e-06, "loss": 9.7902, "step": 197, "task_loss": 3.999314785003662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.829171180725098, "epoch": 0.17, "learning_rate": 8.368554522400676e-06, "loss": 9.6029, "step": 198, "task_loss": 4.079038143157959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.299787521362305, "epoch": 0.17, "learning_rate": 8.410819949281489e-06, "loss": 9.6608, "step": 199, "task_loss": 4.106539726257324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.038948059082031, "epoch": 0.17, "learning_rate": 8.4530853761623e-06, "loss": 9.6341, "step": 200, "task_loss": 4.156850814819336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.216949462890625, "epoch": 0.17, "learning_rate": 8.495350803043111e-06, "loss": 10.5543, "step": 201, "task_loss": 4.123752117156982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.41486930847168, "epoch": 0.17, "learning_rate": 8.537616229923922e-06, "loss": 9.6986, "step": 202, "task_loss": 3.9575307369232178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.376791000366211, "epoch": 0.17, "learning_rate": 8.579881656804733e-06, "loss": 9.6901, "step": 203, "task_loss": 3.9421780109405518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.882196426391602, "epoch": 0.17, "learning_rate": 8.622147083685546e-06, "loss": 10.0823, "step": 204, "task_loss": 3.8839290142059326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.709710121154785, "epoch": 0.17, "learning_rate": 8.664412510566357e-06, "loss": 9.5854, "step": 205, "task_loss": 3.9729766845703125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.835489273071289, "epoch": 0.17, "learning_rate": 8.70667793744717e-06, "loss": 9.1047, "step": 206, "task_loss": 3.6019794940948486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.569930076599121, "epoch": 0.17, "learning_rate": 8.74894336432798e-06, "loss": 9.2932, "step": 207, "task_loss": 4.285494804382324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.408281326293945, "epoch": 0.18, "learning_rate": 8.791208791208792e-06, "loss": 8.9305, "step": 208, "task_loss": 4.25324821472168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.855817794799805, "epoch": 0.18, "learning_rate": 8.833474218089603e-06, "loss": 9.708, "step": 209, "task_loss": 4.019259929656982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.003971099853516, "epoch": 0.18, "learning_rate": 8.875739644970414e-06, "loss": 9.717, "step": 210, "task_loss": 3.8217432498931885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.502333641052246, "epoch": 0.18, "learning_rate": 8.918005071851227e-06, "loss": 9.0949, "step": 211, "task_loss": 3.8114209175109863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.079965591430664, "epoch": 0.18, "learning_rate": 8.960270498732038e-06, "loss": 9.928, "step": 212, "task_loss": 3.931993007659912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.937761306762695, "epoch": 0.18, "learning_rate": 9.002535925612849e-06, "loss": 9.3971, "step": 213, "task_loss": 3.545567750930786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.179677963256836, "epoch": 0.18, "learning_rate": 9.04480135249366e-06, "loss": 8.7895, "step": 214, "task_loss": 3.9564297199249268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.430915832519531, "epoch": 0.18, "learning_rate": 9.087066779374471e-06, "loss": 9.4255, "step": 215, "task_loss": 3.6936464309692383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.708186626434326, "epoch": 0.18, "learning_rate": 9.129332206255284e-06, "loss": 9.8396, "step": 216, "task_loss": 3.6646270751953125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.04698371887207, "epoch": 0.18, "learning_rate": 9.171597633136095e-06, "loss": 8.6434, "step": 217, "task_loss": 4.043421745300293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.984081268310547, "epoch": 0.18, "learning_rate": 9.213863060016908e-06, "loss": 8.9999, "step": 218, "task_loss": 3.9021544456481934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.195612907409668, "epoch": 0.19, "learning_rate": 9.256128486897717e-06, "loss": 8.4716, "step": 219, "task_loss": 3.597910165786743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.274510383605957, "epoch": 0.19, "learning_rate": 9.298393913778528e-06, "loss": 8.9149, "step": 220, "task_loss": 3.711958169937134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.383131980895996, "epoch": 0.19, "learning_rate": 9.340659340659341e-06, "loss": 9.407, "step": 221, "task_loss": 3.8420701026916504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.546163558959961, "epoch": 0.19, "learning_rate": 9.382924767540152e-06, "loss": 8.6391, "step": 222, "task_loss": 3.2911906242370605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.781237602233887, "epoch": 0.19, "learning_rate": 9.425190194420965e-06, "loss": 8.5782, "step": 223, "task_loss": 3.6772801876068115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.79945182800293, "epoch": 0.19, "learning_rate": 9.467455621301776e-06, "loss": 9.0533, "step": 224, "task_loss": 3.8824925422668457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.585966110229492, "epoch": 0.19, "learning_rate": 9.509721048182587e-06, "loss": 8.6693, "step": 225, "task_loss": 3.2115890979766846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.868124008178711, "epoch": 0.19, "learning_rate": 9.551986475063398e-06, "loss": 9.9145, "step": 226, "task_loss": 3.5866811275482178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.454011917114258, "epoch": 0.19, "learning_rate": 9.59425190194421e-06, "loss": 9.602, "step": 227, "task_loss": 3.671903371810913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.080892562866211, "epoch": 0.19, "learning_rate": 9.636517328825022e-06, "loss": 9.236, "step": 228, "task_loss": 3.8781256675720215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.694344520568848, "epoch": 0.19, "learning_rate": 9.678782755705833e-06, "loss": 8.6795, "step": 229, "task_loss": 3.7127487659454346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.361852645874023, "epoch": 0.19, "learning_rate": 9.721048182586644e-06, "loss": 9.1772, "step": 230, "task_loss": 4.048767566680908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.946479797363281, "epoch": 0.2, "learning_rate": 9.763313609467455e-06, "loss": 9.6015, "step": 231, "task_loss": 3.758242607116699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.552227973937988, "epoch": 0.2, "learning_rate": 9.805579036348266e-06, "loss": 9.0544, "step": 232, "task_loss": 3.474621057510376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.391003608703613, "epoch": 0.2, "learning_rate": 9.84784446322908e-06, "loss": 9.1934, "step": 233, "task_loss": 3.8352060317993164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.020505905151367, "epoch": 0.2, "learning_rate": 9.89010989010989e-06, "loss": 9.0082, "step": 234, "task_loss": 3.3080086708068848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.987804412841797, "epoch": 0.2, "learning_rate": 9.932375316990703e-06, "loss": 9.0133, "step": 235, "task_loss": 3.8554043769836426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.857782363891602, "epoch": 0.2, "learning_rate": 9.974640743871514e-06, "loss": 8.6475, "step": 236, "task_loss": 3.5057122707366943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.407407760620117, "epoch": 0.2, "learning_rate": 1.0016906170752325e-05, "loss": 8.9769, "step": 237, "task_loss": 3.6319797039031982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.700774192810059, "epoch": 0.2, "learning_rate": 1.0059171597633136e-05, "loss": 8.6491, "step": 238, "task_loss": 3.5044655799865723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.22755241394043, "epoch": 0.2, "learning_rate": 1.0101437024513947e-05, "loss": 9.062, "step": 239, "task_loss": 3.7174177169799805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.068516731262207, "epoch": 0.2, "learning_rate": 1.014370245139476e-05, "loss": 8.7242, "step": 240, "task_loss": 3.6628873348236084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.076862335205078, "epoch": 0.2, "learning_rate": 1.0185967878275571e-05, "loss": 9.0803, "step": 241, "task_loss": 3.7128098011016846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.254521369934082, "epoch": 0.2, "learning_rate": 1.0228233305156382e-05, "loss": 8.9578, "step": 242, "task_loss": 3.5981173515319824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.438701629638672, "epoch": 0.21, "learning_rate": 1.0270498732037193e-05, "loss": 8.6914, "step": 243, "task_loss": 3.3172607421875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.060410499572754, "epoch": 0.21, "learning_rate": 1.0312764158918005e-05, "loss": 8.9001, "step": 244, "task_loss": 3.4519731998443604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.345046997070312, "epoch": 0.21, "learning_rate": 1.0355029585798817e-05, "loss": 8.3225, "step": 245, "task_loss": 3.19291615486145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.830698013305664, "epoch": 0.21, "learning_rate": 1.0397295012679628e-05, "loss": 8.6938, "step": 246, "task_loss": 3.2747275829315186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.47376823425293, "epoch": 0.21, "learning_rate": 1.0439560439560441e-05, "loss": 9.0141, "step": 247, "task_loss": 3.191412925720215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.231502532958984, "epoch": 0.21, "learning_rate": 1.048182586644125e-05, "loss": 8.6917, "step": 248, "task_loss": 3.282334566116333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.188379287719727, "epoch": 0.21, "learning_rate": 1.0524091293322063e-05, "loss": 9.5349, "step": 249, "task_loss": 3.4164657592773438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.567378044128418, "epoch": 0.21, "learning_rate": 1.0566356720202874e-05, "loss": 8.8724, "step": 250, "task_loss": 3.5913124084472656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.760666847229004, "epoch": 0.21, "learning_rate": 1.0608622147083686e-05, "loss": 8.1636, "step": 251, "task_loss": 3.483957529067993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.500585556030273, "epoch": 0.21, "learning_rate": 1.0650887573964498e-05, "loss": 9.0621, "step": 252, "task_loss": 3.738168954849243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.115425109863281, "epoch": 0.21, "learning_rate": 1.069315300084531e-05, "loss": 7.3075, "step": 253, "task_loss": 3.3705291748046875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.635997772216797, "epoch": 0.21, "learning_rate": 1.073541842772612e-05, "loss": 8.6073, "step": 254, "task_loss": 3.6014835834503174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.210226058959961, "epoch": 0.22, "learning_rate": 1.0777683854606932e-05, "loss": 8.2774, "step": 255, "task_loss": 2.881493330001831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.415550708770752, "epoch": 0.22, "learning_rate": 1.0819949281487743e-05, "loss": 8.5258, "step": 256, "task_loss": 3.4588966369628906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.154767990112305, "epoch": 0.22, "learning_rate": 1.0862214708368555e-05, "loss": 8.0084, "step": 257, "task_loss": 3.1955325603485107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.13353157043457, "epoch": 0.22, "learning_rate": 1.0904480135249366e-05, "loss": 7.9645, "step": 258, "task_loss": 3.771306037902832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.701910972595215, "epoch": 0.22, "learning_rate": 1.094674556213018e-05, "loss": 7.6286, "step": 259, "task_loss": 3.565950632095337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.414033889770508, "epoch": 0.22, "learning_rate": 1.0989010989010989e-05, "loss": 8.6498, "step": 260, "task_loss": 2.9891951084136963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.290165901184082, "epoch": 0.22, "learning_rate": 1.1031276415891801e-05, "loss": 8.6685, "step": 261, "task_loss": 2.9994091987609863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.77068042755127, "epoch": 0.22, "learning_rate": 1.1073541842772613e-05, "loss": 8.8589, "step": 262, "task_loss": 3.2685546875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.688724517822266, "epoch": 0.22, "learning_rate": 1.1115807269653424e-05, "loss": 7.9953, "step": 263, "task_loss": 3.321538209915161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.403505325317383, "epoch": 0.22, "learning_rate": 1.1158072696534236e-05, "loss": 8.7281, "step": 264, "task_loss": 3.2189598083496094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.617870330810547, "epoch": 0.22, "learning_rate": 1.1200338123415047e-05, "loss": 7.9859, "step": 265, "task_loss": 3.474712371826172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.767102241516113, "epoch": 0.22, "learning_rate": 1.1242603550295859e-05, "loss": 7.7027, "step": 266, "task_loss": 2.6980035305023193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.136045455932617, "epoch": 0.23, "learning_rate": 1.128486897717667e-05, "loss": 7.9763, "step": 267, "task_loss": 3.768853187561035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.04012393951416, "epoch": 0.23, "learning_rate": 1.132713440405748e-05, "loss": 8.2781, "step": 268, "task_loss": 3.1157946586608887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.61848258972168, "epoch": 0.23, "learning_rate": 1.1369399830938294e-05, "loss": 7.4994, "step": 269, "task_loss": 3.1562840938568115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.576787948608398, "epoch": 0.23, "learning_rate": 1.1411665257819105e-05, "loss": 8.4031, "step": 270, "task_loss": 2.711094379425049 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.111095428466797, "epoch": 0.23, "learning_rate": 1.1453930684699916e-05, "loss": 7.9395, "step": 271, "task_loss": 3.4933362007141113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.787321090698242, "epoch": 0.23, "learning_rate": 1.1496196111580727e-05, "loss": 7.902, "step": 272, "task_loss": 3.258427143096924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.489380836486816, "epoch": 0.23, "learning_rate": 1.153846153846154e-05, "loss": 8.4259, "step": 273, "task_loss": 2.996643304824829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.90135383605957, "epoch": 0.23, "learning_rate": 1.158072696534235e-05, "loss": 8.3521, "step": 274, "task_loss": 2.73516583442688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.699431419372559, "epoch": 0.23, "learning_rate": 1.1622992392223162e-05, "loss": 8.8066, "step": 275, "task_loss": 3.1251683235168457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.420254707336426, "epoch": 0.23, "learning_rate": 1.1665257819103974e-05, "loss": 7.9228, "step": 276, "task_loss": 2.9531593322753906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.027641296386719, "epoch": 0.23, "learning_rate": 1.1707523245984786e-05, "loss": 8.1953, "step": 277, "task_loss": 3.0873312950134277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.769328594207764, "epoch": 0.23, "learning_rate": 1.1749788672865597e-05, "loss": 7.749, "step": 278, "task_loss": 3.2071521282196045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.19460678100586, "epoch": 0.24, "learning_rate": 1.1792054099746408e-05, "loss": 8.2392, "step": 279, "task_loss": 3.1791350841522217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.13593578338623, "epoch": 0.24, "learning_rate": 1.1834319526627219e-05, "loss": 8.2283, "step": 280, "task_loss": 3.136172294616699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.444770812988281, "epoch": 0.24, "learning_rate": 1.1876584953508032e-05, "loss": 7.6916, "step": 281, "task_loss": 3.1838512420654297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.633211135864258, "epoch": 0.24, "learning_rate": 1.1918850380388843e-05, "loss": 8.3672, "step": 282, "task_loss": 3.301696300506592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.746334075927734, "epoch": 0.24, "learning_rate": 1.1961115807269654e-05, "loss": 8.035, "step": 283, "task_loss": 2.9519131183624268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.243766784667969, "epoch": 0.24, "learning_rate": 1.2003381234150465e-05, "loss": 7.3558, "step": 284, "task_loss": 2.5326671600341797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.28909969329834, "epoch": 0.24, "learning_rate": 1.2045646661031278e-05, "loss": 7.225, "step": 285, "task_loss": 3.277024030685425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.461730003356934, "epoch": 0.24, "learning_rate": 1.2087912087912089e-05, "loss": 7.474, "step": 286, "task_loss": 2.977522850036621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.790618896484375, "epoch": 0.24, "learning_rate": 1.21301775147929e-05, "loss": 7.5562, "step": 287, "task_loss": 2.8264684677124023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.595634937286377, "epoch": 0.24, "learning_rate": 1.2172442941673713e-05, "loss": 7.6003, "step": 288, "task_loss": 3.3120086193084717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.217419624328613, "epoch": 0.24, "learning_rate": 1.2214708368554522e-05, "loss": 6.4637, "step": 289, "task_loss": 3.0589048862457275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.162705421447754, "epoch": 0.24, "learning_rate": 1.2256973795435335e-05, "loss": 6.8342, "step": 290, "task_loss": 2.5783019065856934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.19760274887085, "epoch": 0.25, "learning_rate": 1.2299239222316146e-05, "loss": 7.2736, "step": 291, "task_loss": 3.362192392349243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.472311973571777, "epoch": 0.25, "learning_rate": 1.2341504649196957e-05, "loss": 7.1116, "step": 292, "task_loss": 2.647622585296631 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.400154113769531, "epoch": 0.25, "learning_rate": 1.238377007607777e-05, "loss": 6.7419, "step": 293, "task_loss": 2.456449508666992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.02690315246582, "epoch": 0.25, "learning_rate": 1.242603550295858e-05, "loss": 7.3142, "step": 294, "task_loss": 2.8212831020355225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.869359016418457, "epoch": 0.25, "learning_rate": 1.2468300929839392e-05, "loss": 7.3636, "step": 295, "task_loss": 2.766111373901367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.493875503540039, "epoch": 0.25, "learning_rate": 1.2510566356720205e-05, "loss": 7.2889, "step": 296, "task_loss": 3.290860652923584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.03181791305542, "epoch": 0.25, "learning_rate": 1.2552831783601016e-05, "loss": 6.9105, "step": 297, "task_loss": 2.9176244735717773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.181756973266602, "epoch": 0.25, "learning_rate": 1.2595097210481827e-05, "loss": 6.9361, "step": 298, "task_loss": 2.4063384532928467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.473308563232422, "epoch": 0.25, "learning_rate": 1.2637362637362638e-05, "loss": 6.9408, "step": 299, "task_loss": 2.5788469314575195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.2282915115356445, "epoch": 0.25, "learning_rate": 1.2679628064243449e-05, "loss": 7.3187, "step": 300, "task_loss": 3.1390576362609863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.785738945007324, "epoch": 0.25, "learning_rate": 1.2721893491124262e-05, "loss": 7.1577, "step": 301, "task_loss": 2.2855823040008545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.428567409515381, "epoch": 0.26, "learning_rate": 1.2764158918005073e-05, "loss": 7.0928, "step": 302, "task_loss": 2.671579360961914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.212998867034912, "epoch": 0.26, "learning_rate": 1.2806424344885884e-05, "loss": 6.6411, "step": 303, "task_loss": 2.615726947784424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.74449348449707, "epoch": 0.26, "learning_rate": 1.2848689771766695e-05, "loss": 6.9111, "step": 304, "task_loss": 2.9002161026000977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.850585460662842, "epoch": 0.26, "learning_rate": 1.2890955198647506e-05, "loss": 7.3291, "step": 305, "task_loss": 3.1867191791534424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.805402755737305, "epoch": 0.26, "learning_rate": 1.2933220625528319e-05, "loss": 7.1928, "step": 306, "task_loss": 2.387925386428833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.910515785217285, "epoch": 0.26, "learning_rate": 1.297548605240913e-05, "loss": 7.0842, "step": 307, "task_loss": 2.3775668144226074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.981715202331543, "epoch": 0.26, "learning_rate": 1.3017751479289941e-05, "loss": 6.5439, "step": 308, "task_loss": 2.5921473503112793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.610891342163086, "epoch": 0.26, "learning_rate": 1.3060016906170752e-05, "loss": 7.1423, "step": 309, "task_loss": 2.8785226345062256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.0554304122924805, "epoch": 0.26, "learning_rate": 1.3102282333051563e-05, "loss": 5.9923, "step": 310, "task_loss": 2.7669990062713623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.873757362365723, "epoch": 0.26, "learning_rate": 1.3144547759932378e-05, "loss": 6.7758, "step": 311, "task_loss": 2.6831376552581787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.9176788330078125, "epoch": 0.26, "learning_rate": 1.3186813186813187e-05, "loss": 6.8608, "step": 312, "task_loss": 2.2950429916381836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.42899227142334, "epoch": 0.26, "learning_rate": 1.3229078613693998e-05, "loss": 6.8969, "step": 313, "task_loss": 2.610841989517212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.422521591186523, "epoch": 0.27, "learning_rate": 1.327134404057481e-05, "loss": 6.8519, "step": 314, "task_loss": 3.0274319648742676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.260191917419434, "epoch": 0.27, "learning_rate": 1.3313609467455624e-05, "loss": 6.184, "step": 315, "task_loss": 2.229701042175293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.206312656402588, "epoch": 0.27, "learning_rate": 1.3355874894336435e-05, "loss": 6.7417, "step": 316, "task_loss": 2.2478044033050537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.560079574584961, "epoch": 0.27, "learning_rate": 1.3398140321217246e-05, "loss": 6.8284, "step": 317, "task_loss": 2.7621591091156006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.798011779785156, "epoch": 0.27, "learning_rate": 1.3440405748098055e-05, "loss": 6.0798, "step": 318, "task_loss": 2.4615724086761475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.894994735717773, "epoch": 0.27, "learning_rate": 1.3482671174978866e-05, "loss": 6.9614, "step": 319, "task_loss": 3.0196123123168945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.6640214920043945, "epoch": 0.27, "learning_rate": 1.352493660185968e-05, "loss": 7.2014, "step": 320, "task_loss": 2.7083771228790283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.498839855194092, "epoch": 0.27, "learning_rate": 1.3567202028740492e-05, "loss": 6.0043, "step": 321, "task_loss": 2.57639217376709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.266739845275879, "epoch": 0.27, "learning_rate": 1.3609467455621303e-05, "loss": 6.4169, "step": 322, "task_loss": 2.02738094329834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.437615871429443, "epoch": 0.27, "learning_rate": 1.3651732882502114e-05, "loss": 6.9068, "step": 323, "task_loss": 3.103834390640259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.877680778503418, "epoch": 0.27, "learning_rate": 1.3693998309382925e-05, "loss": 5.8824, "step": 324, "task_loss": 2.407956123352051 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.760437488555908, "epoch": 0.27, "learning_rate": 1.3736263736263738e-05, "loss": 6.5646, "step": 325, "task_loss": 1.9662142992019653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.232259750366211, "epoch": 0.28, "learning_rate": 1.3778529163144549e-05, "loss": 5.837, "step": 326, "task_loss": 2.3079373836517334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.061127662658691, "epoch": 0.28, "learning_rate": 1.382079459002536e-05, "loss": 6.5843, "step": 327, "task_loss": 2.5315494537353516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.084709167480469, "epoch": 0.28, "learning_rate": 1.3863060016906171e-05, "loss": 6.3076, "step": 328, "task_loss": 2.286262035369873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.249837398529053, "epoch": 0.28, "learning_rate": 1.3905325443786982e-05, "loss": 6.3723, "step": 329, "task_loss": 2.6068496704101562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.857322692871094, "epoch": 0.28, "learning_rate": 1.3947590870667795e-05, "loss": 6.1248, "step": 330, "task_loss": 2.0540034770965576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.113246440887451, "epoch": 0.28, "learning_rate": 1.3989856297548606e-05, "loss": 6.0194, "step": 331, "task_loss": 2.010962724685669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.814301490783691, "epoch": 0.28, "learning_rate": 1.4032121724429417e-05, "loss": 6.1691, "step": 332, "task_loss": 2.532341241836548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.61727237701416, "epoch": 0.28, "learning_rate": 1.4074387151310228e-05, "loss": 6.3174, "step": 333, "task_loss": 1.9083880186080933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.70169734954834, "epoch": 0.28, "learning_rate": 1.411665257819104e-05, "loss": 6.0141, "step": 334, "task_loss": 2.515512228012085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.251251220703125, "epoch": 0.28, "learning_rate": 1.4158918005071852e-05, "loss": 6.9427, "step": 335, "task_loss": 2.480609893798828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.369148254394531, "epoch": 0.28, "learning_rate": 1.4201183431952663e-05, "loss": 5.8693, "step": 336, "task_loss": 2.1720032691955566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.2281670570373535, "epoch": 0.28, "learning_rate": 1.4243448858833474e-05, "loss": 5.3506, "step": 337, "task_loss": 2.1687817573547363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.734415054321289, "epoch": 0.29, "learning_rate": 1.4285714285714285e-05, "loss": 6.0431, "step": 338, "task_loss": 2.306811809539795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.350484848022461, "epoch": 0.29, "learning_rate": 1.4327979712595097e-05, "loss": 5.876, "step": 339, "task_loss": 2.484330892562866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.591800689697266, "epoch": 0.29, "learning_rate": 1.4370245139475911e-05, "loss": 5.5564, "step": 340, "task_loss": 2.388108253479004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.097963809967041, "epoch": 0.29, "learning_rate": 1.441251056635672e-05, "loss": 5.7937, "step": 341, "task_loss": 2.235738515853882 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.015453338623047, "epoch": 0.29, "learning_rate": 1.4454775993237531e-05, "loss": 6.0484, "step": 342, "task_loss": 2.593729019165039 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.147584915161133, "epoch": 0.29, "learning_rate": 1.4497041420118343e-05, "loss": 6.0263, "step": 343, "task_loss": 2.5251975059509277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.920589447021484, "epoch": 0.29, "learning_rate": 1.4539306846999157e-05, "loss": 5.9855, "step": 344, "task_loss": 2.1887781620025635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.158243179321289, "epoch": 0.29, "learning_rate": 1.4581572273879968e-05, "loss": 5.5422, "step": 345, "task_loss": 2.1593053340911865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.341128349304199, "epoch": 0.29, "learning_rate": 1.462383770076078e-05, "loss": 5.4015, "step": 346, "task_loss": 2.297511339187622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.213985919952393, "epoch": 0.29, "learning_rate": 1.466610312764159e-05, "loss": 5.6427, "step": 347, "task_loss": 1.9561781883239746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.228743553161621, "epoch": 0.29, "learning_rate": 1.47083685545224e-05, "loss": 5.6217, "step": 348, "task_loss": 2.4128050804138184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.457026958465576, "epoch": 0.29, "learning_rate": 1.4750633981403214e-05, "loss": 5.3361, "step": 349, "task_loss": 1.6784095764160156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.081937313079834, "epoch": 0.3, "learning_rate": 1.4792899408284025e-05, "loss": 4.7882, "step": 350, "task_loss": 2.1411118507385254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.480690956115723, "epoch": 0.3, "learning_rate": 1.4835164835164836e-05, "loss": 5.5894, "step": 351, "task_loss": 1.6895763874053955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.447168827056885, "epoch": 0.3, "learning_rate": 1.4877430262045647e-05, "loss": 5.8964, "step": 352, "task_loss": 2.352617025375366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.502082347869873, "epoch": 0.3, "learning_rate": 1.4919695688926458e-05, "loss": 5.2495, "step": 353, "task_loss": 1.7761390209197998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.259467601776123, "epoch": 0.3, "learning_rate": 1.4961961115807271e-05, "loss": 6.3188, "step": 354, "task_loss": 1.9013293981552124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.437971591949463, "epoch": 0.3, "learning_rate": 1.5004226542688082e-05, "loss": 5.4945, "step": 355, "task_loss": 2.480257749557495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.047064781188965, "epoch": 0.3, "learning_rate": 1.5046491969568893e-05, "loss": 4.9297, "step": 356, "task_loss": 2.2339107990264893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.814544677734375, "epoch": 0.3, "learning_rate": 1.5088757396449705e-05, "loss": 5.2395, "step": 357, "task_loss": 1.9862208366394043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.48899507522583, "epoch": 0.3, "learning_rate": 1.5131022823330516e-05, "loss": 5.1673, "step": 358, "task_loss": 2.346494674682617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.068769931793213, "epoch": 0.3, "learning_rate": 1.5173288250211328e-05, "loss": 6.3192, "step": 359, "task_loss": 1.8270263671875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.905778884887695, "epoch": 0.3, "learning_rate": 1.521555367709214e-05, "loss": 5.3275, "step": 360, "task_loss": 2.515556573867798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.568187713623047, "epoch": 0.3, "learning_rate": 1.525781910397295e-05, "loss": 4.9508, "step": 361, "task_loss": 1.9543206691741943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.14290714263916, "epoch": 0.31, "learning_rate": 1.530008453085376e-05, "loss": 5.0504, "step": 362, "task_loss": 1.5537413358688354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.294061660766602, "epoch": 0.31, "learning_rate": 1.534234995773457e-05, "loss": 5.7817, "step": 363, "task_loss": 2.2763454914093018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.711641311645508, "epoch": 0.31, "learning_rate": 1.5384615384615387e-05, "loss": 4.918, "step": 364, "task_loss": 2.295132875442505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.71959114074707, "epoch": 0.31, "learning_rate": 1.5426880811496197e-05, "loss": 5.4176, "step": 365, "task_loss": 1.730970025062561 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.809643745422363, "epoch": 0.31, "learning_rate": 1.546914623837701e-05, "loss": 5.4989, "step": 366, "task_loss": 1.8908711671829224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.354519844055176, "epoch": 0.31, "learning_rate": 1.551141166525782e-05, "loss": 4.9994, "step": 367, "task_loss": 2.252140998840332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.818769454956055, "epoch": 0.31, "learning_rate": 1.555367709213863e-05, "loss": 4.6224, "step": 368, "task_loss": 2.2808339595794678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.135170936584473, "epoch": 0.31, "learning_rate": 1.5595942519019444e-05, "loss": 4.6033, "step": 369, "task_loss": 1.8799511194229126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.529808044433594, "epoch": 0.31, "learning_rate": 1.5638207945900254e-05, "loss": 5.7547, "step": 370, "task_loss": 2.4224965572357178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.006011962890625, "epoch": 0.31, "learning_rate": 1.5680473372781066e-05, "loss": 5.2004, "step": 371, "task_loss": 1.773535966873169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.425222873687744, "epoch": 0.31, "learning_rate": 1.5722738799661876e-05, "loss": 5.37, "step": 372, "task_loss": 1.4093199968338013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.11223030090332, "epoch": 0.32, "learning_rate": 1.576500422654269e-05, "loss": 5.1158, "step": 373, "task_loss": 1.751749038696289 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.101421356201172, "epoch": 0.32, "learning_rate": 1.58072696534235e-05, "loss": 5.2174, "step": 374, "task_loss": 1.7568731307983398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.80379581451416, "epoch": 0.32, "learning_rate": 1.584953508030431e-05, "loss": 5.1301, "step": 375, "task_loss": 1.5947718620300293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.3648295402526855, "epoch": 0.32, "learning_rate": 1.5891800507185124e-05, "loss": 4.4972, "step": 376, "task_loss": 1.5682613849639893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.106077194213867, "epoch": 0.32, "learning_rate": 1.5934065934065933e-05, "loss": 5.275, "step": 377, "task_loss": 1.6369590759277344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.22844123840332, "epoch": 0.32, "learning_rate": 1.5976331360946746e-05, "loss": 4.7913, "step": 378, "task_loss": 1.9700238704681396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.59877872467041, "epoch": 0.32, "learning_rate": 1.601859678782756e-05, "loss": 5.132, "step": 379, "task_loss": 1.3100091218948364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.6694016456604, "epoch": 0.32, "learning_rate": 1.6060862214708368e-05, "loss": 4.6817, "step": 380, "task_loss": 1.7603349685668945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.3761115074157715, "epoch": 0.32, "learning_rate": 1.610312764158918e-05, "loss": 4.8867, "step": 381, "task_loss": 1.822664499282837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.007589817047119, "epoch": 0.32, "learning_rate": 1.614539306846999e-05, "loss": 4.7381, "step": 382, "task_loss": 1.7499802112579346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.820176124572754, "epoch": 0.32, "learning_rate": 1.6187658495350806e-05, "loss": 5.0633, "step": 383, "task_loss": 2.327514410018921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.7460198402404785, "epoch": 0.32, "learning_rate": 1.6229923922231616e-05, "loss": 4.6413, "step": 384, "task_loss": 1.1549615859985352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.943947792053223, "epoch": 0.33, "learning_rate": 1.6272189349112425e-05, "loss": 4.9301, "step": 385, "task_loss": 1.9001785516738892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.163317680358887, "epoch": 0.33, "learning_rate": 1.6314454775993238e-05, "loss": 4.6902, "step": 386, "task_loss": 1.2983224391937256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.010702133178711, "epoch": 0.33, "learning_rate": 1.6356720202874047e-05, "loss": 4.5975, "step": 387, "task_loss": 1.796522617340088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.987996816635132, "epoch": 0.33, "learning_rate": 1.6398985629754863e-05, "loss": 4.0046, "step": 388, "task_loss": 1.6701565980911255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.82192325592041, "epoch": 0.33, "learning_rate": 1.6441251056635673e-05, "loss": 4.851, "step": 389, "task_loss": 2.5367350578308105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.722593307495117, "epoch": 0.33, "learning_rate": 1.6483516483516486e-05, "loss": 4.6742, "step": 390, "task_loss": 1.783361792564392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.53436279296875, "epoch": 0.33, "learning_rate": 1.6525781910397295e-05, "loss": 4.5712, "step": 391, "task_loss": 1.957253336906433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.564609527587891, "epoch": 0.33, "learning_rate": 1.6568047337278108e-05, "loss": 4.698, "step": 392, "task_loss": 1.426672339439392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.326480865478516, "epoch": 0.33, "learning_rate": 1.661031276415892e-05, "loss": 4.2897, "step": 393, "task_loss": 1.9501603841781616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.8940839767456055, "epoch": 0.33, "learning_rate": 1.665257819103973e-05, "loss": 4.0876, "step": 394, "task_loss": 2.0132434368133545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.622222900390625, "epoch": 0.33, "learning_rate": 1.6694843617920543e-05, "loss": 5.0483, "step": 395, "task_loss": 1.8250575065612793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.641453266143799, "epoch": 0.33, "learning_rate": 1.6737109044801352e-05, "loss": 4.2104, "step": 396, "task_loss": 1.7931866645812988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.02548885345459, "epoch": 0.34, "learning_rate": 1.6779374471682165e-05, "loss": 4.7028, "step": 397, "task_loss": 1.9356880187988281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.496095657348633, "epoch": 0.34, "learning_rate": 1.6821639898562978e-05, "loss": 4.2976, "step": 398, "task_loss": 1.7331576347351074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.399902820587158, "epoch": 0.34, "learning_rate": 1.6863905325443787e-05, "loss": 4.7302, "step": 399, "task_loss": 1.3496167659759521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.8272504806518555, "epoch": 0.34, "learning_rate": 1.69061707523246e-05, "loss": 4.5282, "step": 400, "task_loss": 1.84552001953125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.289150714874268, "epoch": 0.34, "learning_rate": 1.694843617920541e-05, "loss": 3.9873, "step": 401, "task_loss": 2.0759339332580566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.205371856689453, "epoch": 0.34, "learning_rate": 1.6990701606086222e-05, "loss": 4.5158, "step": 402, "task_loss": 2.198180675506592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.112056732177734, "epoch": 0.34, "learning_rate": 1.7032967032967035e-05, "loss": 3.9158, "step": 403, "task_loss": 1.09367835521698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.292093276977539, "epoch": 0.34, "learning_rate": 1.7075232459847844e-05, "loss": 4.0103, "step": 404, "task_loss": 1.782770037651062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.360280990600586, "epoch": 0.34, "learning_rate": 1.7117497886728657e-05, "loss": 4.46, "step": 405, "task_loss": 1.675170660018921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.4457850456237793, "epoch": 0.34, "learning_rate": 1.7159763313609466e-05, "loss": 3.8621, "step": 406, "task_loss": 0.9419337511062622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.452712297439575, "epoch": 0.34, "learning_rate": 1.7202028740490282e-05, "loss": 3.7746, "step": 407, "task_loss": 1.1267768144607544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.673864841461182, "epoch": 0.34, "learning_rate": 1.7244294167371092e-05, "loss": 3.9696, "step": 408, "task_loss": 1.6245949268341064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.173816680908203, "epoch": 0.35, "learning_rate": 1.72865595942519e-05, "loss": 4.1783, "step": 409, "task_loss": 1.3438111543655396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.341399192810059, "epoch": 0.35, "learning_rate": 1.7328825021132714e-05, "loss": 4.4695, "step": 410, "task_loss": 1.1473441123962402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.8628387451171875, "epoch": 0.35, "learning_rate": 1.7371090448013523e-05, "loss": 4.3961, "step": 411, "task_loss": 1.7702137231826782 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.303501605987549, "epoch": 0.35, "learning_rate": 1.741335587489434e-05, "loss": 4.259, "step": 412, "task_loss": 0.5525205731391907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.952951908111572, "epoch": 0.35, "learning_rate": 1.745562130177515e-05, "loss": 3.7518, "step": 413, "task_loss": 1.5829828977584839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.334525108337402, "epoch": 0.35, "learning_rate": 1.749788672865596e-05, "loss": 3.9616, "step": 414, "task_loss": 1.680464744567871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.903733253479004, "epoch": 0.35, "learning_rate": 1.754015215553677e-05, "loss": 3.9871, "step": 415, "task_loss": 1.5312334299087524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.885559558868408, "epoch": 0.35, "learning_rate": 1.7582417582417584e-05, "loss": 3.784, "step": 416, "task_loss": 1.4334973096847534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.6053595542907715, "epoch": 0.35, "learning_rate": 1.7624683009298397e-05, "loss": 4.4421, "step": 417, "task_loss": 1.5037039518356323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.156390190124512, "epoch": 0.35, "learning_rate": 1.7666948436179206e-05, "loss": 3.7749, "step": 418, "task_loss": 1.6037442684173584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.8080124855041504, "epoch": 0.35, "learning_rate": 1.770921386306002e-05, "loss": 3.6636, "step": 419, "task_loss": 1.6637969017028809 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.479292869567871, "epoch": 0.35, "learning_rate": 1.7751479289940828e-05, "loss": 3.7907, "step": 420, "task_loss": 2.234433650970459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.493191719055176, "epoch": 0.36, "learning_rate": 1.779374471682164e-05, "loss": 3.5741, "step": 421, "task_loss": 1.3695054054260254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.8968281745910645, "epoch": 0.36, "learning_rate": 1.7836010143702454e-05, "loss": 3.9396, "step": 422, "task_loss": 1.0983349084854126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.242815017700195, "epoch": 0.36, "learning_rate": 1.7878275570583263e-05, "loss": 3.3392, "step": 423, "task_loss": 1.559214472770691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.583710193634033, "epoch": 0.36, "learning_rate": 1.7920540997464076e-05, "loss": 4.3783, "step": 424, "task_loss": 1.8920035362243652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.42551326751709, "epoch": 0.36, "learning_rate": 1.7962806424344885e-05, "loss": 3.8254, "step": 425, "task_loss": 1.1572916507720947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.6783761978149414, "epoch": 0.36, "learning_rate": 1.8005071851225698e-05, "loss": 4.1274, "step": 426, "task_loss": 2.5123939514160156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.2860021591186523, "epoch": 0.36, "learning_rate": 1.804733727810651e-05, "loss": 3.5506, "step": 427, "task_loss": 2.2135393619537354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.019425868988037, "epoch": 0.36, "learning_rate": 1.808960270498732e-05, "loss": 3.6586, "step": 428, "task_loss": 2.0903937816619873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.9563803672790527, "epoch": 0.36, "learning_rate": 1.8131868131868133e-05, "loss": 3.7366, "step": 429, "task_loss": 1.2654510736465454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.5858025550842285, "epoch": 0.36, "learning_rate": 1.8174133558748942e-05, "loss": 3.8427, "step": 430, "task_loss": 2.394788980484009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.7217352390289307, "epoch": 0.36, "learning_rate": 1.8216398985629755e-05, "loss": 3.6939, "step": 431, "task_loss": 2.012280225753784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.464168071746826, "epoch": 0.36, "learning_rate": 1.8258664412510568e-05, "loss": 4.2274, "step": 432, "task_loss": 1.4811339378356934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.682771682739258, "epoch": 0.37, "learning_rate": 1.8300929839391377e-05, "loss": 3.4052, "step": 433, "task_loss": 1.2687524557113647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.156030654907227, "epoch": 0.37, "learning_rate": 1.834319526627219e-05, "loss": 3.439, "step": 434, "task_loss": 1.5681263208389282 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.0534415245056152, "epoch": 0.37, "learning_rate": 1.8385460693153e-05, "loss": 3.4619, "step": 435, "task_loss": 0.6403687000274658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.792714834213257, "epoch": 0.37, "learning_rate": 1.8427726120033816e-05, "loss": 3.2645, "step": 436, "task_loss": 1.4865721464157104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.508829116821289, "epoch": 0.37, "learning_rate": 1.8469991546914625e-05, "loss": 3.449, "step": 437, "task_loss": 2.0342023372650146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.771442174911499, "epoch": 0.37, "learning_rate": 1.8512256973795435e-05, "loss": 3.2096, "step": 438, "task_loss": 1.6448383331298828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.7576513290405273, "epoch": 0.37, "learning_rate": 1.8554522400676247e-05, "loss": 3.3896, "step": 439, "task_loss": 1.989166021347046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.2696585655212402, "epoch": 0.37, "learning_rate": 1.8596787827557057e-05, "loss": 3.166, "step": 440, "task_loss": 1.621731162071228 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.015273571014404, "epoch": 0.37, "learning_rate": 1.8639053254437873e-05, "loss": 3.0957, "step": 441, "task_loss": 1.249782681465149 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.8374276161193848, "epoch": 0.37, "learning_rate": 1.8681318681318682e-05, "loss": 3.3129, "step": 442, "task_loss": 1.5639160871505737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.9181265830993652, "epoch": 0.37, "learning_rate": 1.8723584108199495e-05, "loss": 3.6712, "step": 443, "task_loss": 1.9833242893218994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.014204025268555, "epoch": 0.38, "learning_rate": 1.8765849535080304e-05, "loss": 3.3885, "step": 444, "task_loss": 1.6166936159133911 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.649585247039795, "epoch": 0.38, "learning_rate": 1.8808114961961117e-05, "loss": 3.4575, "step": 445, "task_loss": 1.1268773078918457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.3683552742004395, "epoch": 0.38, "learning_rate": 1.885038038884193e-05, "loss": 3.1624, "step": 446, "task_loss": 1.213356375694275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.517117500305176, "epoch": 0.38, "learning_rate": 1.889264581572274e-05, "loss": 3.1741, "step": 447, "task_loss": 1.4275141954421997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0502755641937256, "epoch": 0.38, "learning_rate": 1.8934911242603552e-05, "loss": 3.0345, "step": 448, "task_loss": 1.2092204093933105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.9489402770996094, "epoch": 0.38, "learning_rate": 1.897717666948436e-05, "loss": 2.5975, "step": 449, "task_loss": 1.8202134370803833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.8612520694732666, "epoch": 0.38, "learning_rate": 1.9019442096365174e-05, "loss": 3.4335, "step": 450, "task_loss": 1.433902621269226 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.9671883583068848, "epoch": 0.38, "learning_rate": 1.9061707523245987e-05, "loss": 3.0871, "step": 451, "task_loss": 1.054922103881836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.650888204574585, "epoch": 0.38, "learning_rate": 1.9103972950126796e-05, "loss": 3.1056, "step": 452, "task_loss": 1.4431328773498535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.9032247066497803, "epoch": 0.38, "learning_rate": 1.914623837700761e-05, "loss": 3.6529, "step": 453, "task_loss": 1.4385548830032349 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.945847988128662, "epoch": 0.38, "learning_rate": 1.918850380388842e-05, "loss": 3.3284, "step": 454, "task_loss": 1.587013602256775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.6369996070861816, "epoch": 0.38, "learning_rate": 1.923076923076923e-05, "loss": 3.0098, "step": 455, "task_loss": 1.586968183517456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.2798399925231934, "epoch": 0.39, "learning_rate": 1.9273034657650044e-05, "loss": 2.8956, "step": 456, "task_loss": 0.7911133170127869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.754023551940918, "epoch": 0.39, "learning_rate": 1.9315300084530854e-05, "loss": 3.4011, "step": 457, "task_loss": 1.368206262588501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.924030303955078, "epoch": 0.39, "learning_rate": 1.9357565511411666e-05, "loss": 2.9681, "step": 458, "task_loss": 2.0688095092773438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.331077814102173, "epoch": 0.39, "learning_rate": 1.9399830938292476e-05, "loss": 2.5793, "step": 459, "task_loss": 1.056599497795105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.3401849269866943, "epoch": 0.39, "learning_rate": 1.944209636517329e-05, "loss": 2.8319, "step": 460, "task_loss": 1.7511265277862549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.1815004348754883, "epoch": 0.39, "learning_rate": 1.94843617920541e-05, "loss": 2.9754, "step": 461, "task_loss": 1.6878039836883545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.391164779663086, "epoch": 0.39, "learning_rate": 1.952662721893491e-05, "loss": 2.9456, "step": 462, "task_loss": 0.9921723008155823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.1365957260131836, "epoch": 0.39, "learning_rate": 1.9568892645815723e-05, "loss": 3.145, "step": 463, "task_loss": 1.3737530708312988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.938772201538086, "epoch": 0.39, "learning_rate": 1.9611158072696533e-05, "loss": 2.9286, "step": 464, "task_loss": 1.3250763416290283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.35345458984375, "epoch": 0.39, "learning_rate": 1.965342349957735e-05, "loss": 2.8195, "step": 465, "task_loss": 1.4585660696029663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.9780359268188477, "epoch": 0.39, "learning_rate": 1.969568892645816e-05, "loss": 3.2206, "step": 466, "task_loss": 1.4737735986709595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.478553295135498, "epoch": 0.39, "learning_rate": 1.9737954353338968e-05, "loss": 2.9322, "step": 467, "task_loss": 0.9615731835365295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.095821857452393, "epoch": 0.4, "learning_rate": 1.978021978021978e-05, "loss": 3.2791, "step": 468, "task_loss": 1.4747071266174316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.566957712173462, "epoch": 0.4, "learning_rate": 1.9822485207100593e-05, "loss": 2.9116, "step": 469, "task_loss": 0.942674994468689 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.7826738357543945, "epoch": 0.4, "learning_rate": 1.9864750633981406e-05, "loss": 3.0633, "step": 470, "task_loss": 2.468984365463257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.593606472015381, "epoch": 0.4, "learning_rate": 1.9907016060862216e-05, "loss": 3.1891, "step": 471, "task_loss": 1.514616847038269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.329005718231201, "epoch": 0.4, "learning_rate": 1.994928148774303e-05, "loss": 2.7269, "step": 472, "task_loss": 1.3711979389190674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.0506515502929688, "epoch": 0.4, "learning_rate": 1.9991546914623838e-05, "loss": 3.227, "step": 473, "task_loss": 1.1492937803268433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.3018617630004883, "epoch": 0.4, "learning_rate": 2.003381234150465e-05, "loss": 2.4787, "step": 474, "task_loss": 0.9843816757202148 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.9255075454711914, "epoch": 0.4, "learning_rate": 2.0076077768385463e-05, "loss": 2.9623, "step": 475, "task_loss": 0.9027383923530579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.4689877033233643, "epoch": 0.4, "learning_rate": 2.0118343195266273e-05, "loss": 3.1238, "step": 476, "task_loss": 1.9613374471664429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.5203585624694824, "epoch": 0.4, "learning_rate": 2.0160608622147085e-05, "loss": 3.1037, "step": 477, "task_loss": 1.2278435230255127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.377819538116455, "epoch": 0.4, "learning_rate": 2.0202874049027895e-05, "loss": 2.9635, "step": 478, "task_loss": 2.3276917934417725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1640772819519043, "epoch": 0.4, "learning_rate": 2.0245139475908708e-05, "loss": 2.4336, "step": 479, "task_loss": 1.742612600326538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.5464494228363037, "epoch": 0.41, "learning_rate": 2.028740490278952e-05, "loss": 2.9199, "step": 480, "task_loss": 1.443993330001831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.154899835586548, "epoch": 0.41, "learning_rate": 2.032967032967033e-05, "loss": 3.2635, "step": 481, "task_loss": 2.3695547580718994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.7708606719970703, "epoch": 0.41, "learning_rate": 2.0371935756551143e-05, "loss": 2.6829, "step": 482, "task_loss": 1.175133228302002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.400932788848877, "epoch": 0.41, "learning_rate": 2.0414201183431952e-05, "loss": 2.8959, "step": 483, "task_loss": 1.6066372394561768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.8662965297698975, "epoch": 0.41, "learning_rate": 2.0456466610312765e-05, "loss": 2.7193, "step": 484, "task_loss": 1.148626685142517 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.8352909088134766, "epoch": 0.41, "learning_rate": 2.0498732037193578e-05, "loss": 3.2132, "step": 485, "task_loss": 1.705450177192688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.986587643623352, "epoch": 0.41, "learning_rate": 2.0540997464074387e-05, "loss": 2.4186, "step": 486, "task_loss": 1.2745615243911743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6294431686401367, "epoch": 0.41, "learning_rate": 2.05832628909552e-05, "loss": 2.4867, "step": 487, "task_loss": 1.2600382566452026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.9193830490112305, "epoch": 0.41, "learning_rate": 2.062552831783601e-05, "loss": 2.7906, "step": 488, "task_loss": 0.9101977348327637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9172017574310303, "epoch": 0.41, "learning_rate": 2.0667793744716822e-05, "loss": 2.7077, "step": 489, "task_loss": 0.9316360950469971 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.685241460800171, "epoch": 0.41, "learning_rate": 2.0710059171597635e-05, "loss": 2.6366, "step": 490, "task_loss": 0.6948318481445312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.376417636871338, "epoch": 0.41, "learning_rate": 2.0752324598478444e-05, "loss": 2.7532, "step": 491, "task_loss": 1.2554540634155273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.72540283203125, "epoch": 0.42, "learning_rate": 2.0794590025359257e-05, "loss": 2.729, "step": 492, "task_loss": 1.2253581285476685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.135756015777588, "epoch": 0.42, "learning_rate": 2.083685545224007e-05, "loss": 2.4648, "step": 493, "task_loss": 1.5381687879562378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.5264036655426025, "epoch": 0.42, "learning_rate": 2.0879120879120882e-05, "loss": 2.9324, "step": 494, "task_loss": 1.185011863708496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6648776531219482, "epoch": 0.42, "learning_rate": 2.0921386306001692e-05, "loss": 2.4517, "step": 495, "task_loss": 1.9634770154953003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.4025936126708984, "epoch": 0.42, "learning_rate": 2.09636517328825e-05, "loss": 2.5309, "step": 496, "task_loss": 1.498427391052246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.0163745880126953, "epoch": 0.42, "learning_rate": 2.1005917159763314e-05, "loss": 2.9005, "step": 497, "task_loss": 1.0271401405334473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.0409889221191406, "epoch": 0.42, "learning_rate": 2.1048182586644127e-05, "loss": 2.7309, "step": 498, "task_loss": 0.8560348749160767 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7729259729385376, "epoch": 0.42, "learning_rate": 2.109044801352494e-05, "loss": 2.1227, "step": 499, "task_loss": 0.7563551664352417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1902191638946533, "epoch": 0.42, "learning_rate": 2.113271344040575e-05, "loss": 2.2162, "step": 500, "task_loss": 0.3946598172187805 }, { "epoch": 0.42, "eval_accuracy": 0.7967128712871288, "eval_loss": 2.111081123352051, "eval_runtime": 226.3146, "eval_samples_per_second": 111.57, "eval_steps_per_second": 0.875, "step": 500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2274513244628906, "epoch": 0.42, "learning_rate": 2.117497886728656e-05, "loss": 2.7137, "step": 501, "task_loss": 1.7432268857955933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.308161735534668, "epoch": 0.42, "learning_rate": 2.121724429416737e-05, "loss": 2.426, "step": 502, "task_loss": 0.7575551271438599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.5511913299560547, "epoch": 0.42, "learning_rate": 2.1259509721048184e-05, "loss": 2.5577, "step": 503, "task_loss": 0.8779447674751282 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0753941535949707, "epoch": 0.43, "learning_rate": 2.1301775147928997e-05, "loss": 2.4992, "step": 504, "task_loss": 1.8178752660751343 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.818312168121338, "epoch": 0.43, "learning_rate": 2.1344040574809806e-05, "loss": 2.2774, "step": 505, "task_loss": 1.5198124647140503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7492883205413818, "epoch": 0.43, "learning_rate": 2.138630600169062e-05, "loss": 1.9006, "step": 506, "task_loss": 1.0318002700805664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6895010471343994, "epoch": 0.43, "learning_rate": 2.1428571428571428e-05, "loss": 2.2249, "step": 507, "task_loss": 0.3805544674396515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2279553413391113, "epoch": 0.43, "learning_rate": 2.147083685545224e-05, "loss": 2.3241, "step": 508, "task_loss": 0.5730935335159302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.7193679809570312, "epoch": 0.43, "learning_rate": 2.1513102282333054e-05, "loss": 2.5365, "step": 509, "task_loss": 1.1406598091125488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.006086587905884, "epoch": 0.43, "learning_rate": 2.1555367709213863e-05, "loss": 2.5264, "step": 510, "task_loss": 1.4771238565444946 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8402302265167236, "epoch": 0.43, "learning_rate": 2.1597633136094676e-05, "loss": 2.2005, "step": 511, "task_loss": 1.4359921216964722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6674861907958984, "epoch": 0.43, "learning_rate": 2.1639898562975485e-05, "loss": 2.3768, "step": 512, "task_loss": 1.1441570520401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.151594638824463, "epoch": 0.43, "learning_rate": 2.1682163989856298e-05, "loss": 2.7434, "step": 513, "task_loss": 0.9497563242912292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2260680198669434, "epoch": 0.43, "learning_rate": 2.172442941673711e-05, "loss": 2.2216, "step": 514, "task_loss": 1.5669076442718506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.330763339996338, "epoch": 0.44, "learning_rate": 2.176669484361792e-05, "loss": 2.5765, "step": 515, "task_loss": 1.5978710651397705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.9076218605041504, "epoch": 0.44, "learning_rate": 2.1808960270498733e-05, "loss": 2.149, "step": 516, "task_loss": 1.35514235496521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.5301568508148193, "epoch": 0.44, "learning_rate": 2.1851225697379546e-05, "loss": 2.3057, "step": 517, "task_loss": 1.9907859563827515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.725247383117676, "epoch": 0.44, "learning_rate": 2.189349112426036e-05, "loss": 2.0454, "step": 518, "task_loss": 1.2835192680358887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.3177335262298584, "epoch": 0.44, "learning_rate": 2.1935756551141168e-05, "loss": 2.0906, "step": 519, "task_loss": 1.4932316541671753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.933807849884033, "epoch": 0.44, "learning_rate": 2.1978021978021977e-05, "loss": 2.3776, "step": 520, "task_loss": 1.022037386894226 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.8574657440185547, "epoch": 0.44, "learning_rate": 2.202028740490279e-05, "loss": 2.5734, "step": 521, "task_loss": 1.2105145454406738 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.4604203701019287, "epoch": 0.44, "learning_rate": 2.2062552831783603e-05, "loss": 2.3599, "step": 522, "task_loss": 0.8837813138961792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.145937204360962, "epoch": 0.44, "learning_rate": 2.2104818258664416e-05, "loss": 2.0186, "step": 523, "task_loss": 1.1245697736740112 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.8719935417175293, "epoch": 0.44, "learning_rate": 2.2147083685545225e-05, "loss": 2.1216, "step": 524, "task_loss": 0.8426724076271057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.3992369174957275, "epoch": 0.44, "learning_rate": 2.2189349112426034e-05, "loss": 2.2113, "step": 525, "task_loss": 1.3623881340026855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.3700616359710693, "epoch": 0.44, "learning_rate": 2.2231614539306847e-05, "loss": 2.3038, "step": 526, "task_loss": 1.0653021335601807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.099202871322632, "epoch": 0.45, "learning_rate": 2.227387996618766e-05, "loss": 2.0201, "step": 527, "task_loss": 1.6170547008514404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.705455780029297, "epoch": 0.45, "learning_rate": 2.2316145393068473e-05, "loss": 2.1571, "step": 528, "task_loss": 1.683029294013977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6820273399353027, "epoch": 0.45, "learning_rate": 2.2358410819949282e-05, "loss": 2.3273, "step": 529, "task_loss": 1.184294581413269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.37359619140625, "epoch": 0.45, "learning_rate": 2.2400676246830095e-05, "loss": 2.2188, "step": 530, "task_loss": 1.1924149990081787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.7993431091308594, "epoch": 0.45, "learning_rate": 2.2442941673710904e-05, "loss": 2.256, "step": 531, "task_loss": 1.2947402000427246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.96042799949646, "epoch": 0.45, "learning_rate": 2.2485207100591717e-05, "loss": 2.0357, "step": 532, "task_loss": 1.4972035884857178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.7862420082092285, "epoch": 0.45, "learning_rate": 2.252747252747253e-05, "loss": 2.3625, "step": 533, "task_loss": 1.3344647884368896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8390238285064697, "epoch": 0.45, "learning_rate": 2.256973795435334e-05, "loss": 2.1125, "step": 534, "task_loss": 1.4063631296157837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.944563388824463, "epoch": 0.45, "learning_rate": 2.2612003381234152e-05, "loss": 2.3764, "step": 535, "task_loss": 1.1201328039169312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.98451828956604, "epoch": 0.45, "learning_rate": 2.265426880811496e-05, "loss": 2.0679, "step": 536, "task_loss": 1.4023540019989014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6062114238739014, "epoch": 0.45, "learning_rate": 2.2696534234995774e-05, "loss": 2.1829, "step": 537, "task_loss": 1.1712727546691895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.1873345375061035, "epoch": 0.45, "learning_rate": 2.2738799661876587e-05, "loss": 2.5371, "step": 538, "task_loss": 1.6248446702957153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9745615720748901, "epoch": 0.46, "learning_rate": 2.2781065088757396e-05, "loss": 1.7616, "step": 539, "task_loss": 1.0704917907714844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.324805736541748, "epoch": 0.46, "learning_rate": 2.282333051563821e-05, "loss": 2.2232, "step": 540, "task_loss": 1.0471283197402954 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5846731662750244, "epoch": 0.46, "learning_rate": 2.286559594251902e-05, "loss": 2.2542, "step": 541, "task_loss": 1.264601469039917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.485502004623413, "epoch": 0.46, "learning_rate": 2.290786136939983e-05, "loss": 1.9869, "step": 542, "task_loss": 0.9009640216827393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8482701778411865, "epoch": 0.46, "learning_rate": 2.2950126796280644e-05, "loss": 1.8542, "step": 543, "task_loss": 1.0924466848373413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.030120372772217, "epoch": 0.46, "learning_rate": 2.2992392223161454e-05, "loss": 2.1887, "step": 544, "task_loss": 0.8587931394577026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.605255126953125, "epoch": 0.46, "learning_rate": 2.3034657650042266e-05, "loss": 2.0923, "step": 545, "task_loss": 1.2327290773391724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1006674766540527, "epoch": 0.46, "learning_rate": 2.307692307692308e-05, "loss": 2.1311, "step": 546, "task_loss": 0.4016457200050354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.7875733375549316, "epoch": 0.46, "learning_rate": 2.3119188503803892e-05, "loss": 2.0668, "step": 547, "task_loss": 1.3577451705932617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.655400276184082, "epoch": 0.46, "learning_rate": 2.31614539306847e-05, "loss": 2.2127, "step": 548, "task_loss": 1.261154055595398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.652267336845398, "epoch": 0.46, "learning_rate": 2.320371935756551e-05, "loss": 1.8506, "step": 549, "task_loss": 1.6498262882232666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.518878936767578, "epoch": 0.46, "learning_rate": 2.3245984784446323e-05, "loss": 2.1833, "step": 550, "task_loss": 1.6698095798492432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.615967035293579, "epoch": 0.47, "learning_rate": 2.3288250211327136e-05, "loss": 1.6275, "step": 551, "task_loss": 1.06777822971344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8248136043548584, "epoch": 0.47, "learning_rate": 2.333051563820795e-05, "loss": 1.9895, "step": 552, "task_loss": 1.7383482456207275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9773386716842651, "epoch": 0.47, "learning_rate": 2.337278106508876e-05, "loss": 1.8523, "step": 553, "task_loss": 1.3130162954330444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0311052799224854, "epoch": 0.47, "learning_rate": 2.341504649196957e-05, "loss": 1.932, "step": 554, "task_loss": 0.8531879186630249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.3066306114196777, "epoch": 0.47, "learning_rate": 2.345731191885038e-05, "loss": 2.0945, "step": 555, "task_loss": 1.1388007402420044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.182826519012451, "epoch": 0.47, "learning_rate": 2.3499577345731193e-05, "loss": 1.774, "step": 556, "task_loss": 0.8327242136001587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.70113468170166, "epoch": 0.47, "learning_rate": 2.3541842772612006e-05, "loss": 1.7191, "step": 557, "task_loss": 1.9312796592712402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.165696620941162, "epoch": 0.47, "learning_rate": 2.3584108199492815e-05, "loss": 1.7622, "step": 558, "task_loss": 1.0112472772598267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1358158588409424, "epoch": 0.47, "learning_rate": 2.3626373626373628e-05, "loss": 2.2719, "step": 559, "task_loss": 0.5982741117477417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7763363122940063, "epoch": 0.47, "learning_rate": 2.3668639053254438e-05, "loss": 1.8897, "step": 560, "task_loss": 1.7111505270004272 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.694293737411499, "epoch": 0.47, "learning_rate": 2.371090448013525e-05, "loss": 1.6211, "step": 561, "task_loss": 0.759821355342865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.54893159866333, "epoch": 0.47, "learning_rate": 2.3753169907016063e-05, "loss": 2.2271, "step": 562, "task_loss": 1.163705587387085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6774253845214844, "epoch": 0.48, "learning_rate": 2.3795435333896873e-05, "loss": 1.9022, "step": 563, "task_loss": 1.1847901344299316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2768232822418213, "epoch": 0.48, "learning_rate": 2.3837700760777685e-05, "loss": 1.7219, "step": 564, "task_loss": 1.7623732089996338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1804070472717285, "epoch": 0.48, "learning_rate": 2.3879966187658495e-05, "loss": 1.8366, "step": 565, "task_loss": 1.3175655603408813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7261176109313965, "epoch": 0.48, "learning_rate": 2.3922231614539308e-05, "loss": 1.8226, "step": 566, "task_loss": 0.6513174176216125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.397911787033081, "epoch": 0.48, "learning_rate": 2.396449704142012e-05, "loss": 2.1826, "step": 567, "task_loss": 1.7065224647521973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5233944654464722, "epoch": 0.48, "learning_rate": 2.400676246830093e-05, "loss": 1.8105, "step": 568, "task_loss": 0.5079963803291321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1201059818267822, "epoch": 0.48, "learning_rate": 2.4049027895181742e-05, "loss": 2.2069, "step": 569, "task_loss": 1.40531325340271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.001434087753296, "epoch": 0.48, "learning_rate": 2.4091293322062555e-05, "loss": 1.7941, "step": 570, "task_loss": 1.075447678565979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3197951316833496, "epoch": 0.48, "learning_rate": 2.4133558748943365e-05, "loss": 1.7658, "step": 571, "task_loss": 0.7167283892631531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8534919023513794, "epoch": 0.48, "learning_rate": 2.4175824175824177e-05, "loss": 1.5157, "step": 572, "task_loss": 0.8866926431655884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.143008232116699, "epoch": 0.48, "learning_rate": 2.4218089602704987e-05, "loss": 2.0494, "step": 573, "task_loss": 1.5572212934494019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.396106481552124, "epoch": 0.48, "learning_rate": 2.42603550295858e-05, "loss": 2.0291, "step": 574, "task_loss": 1.2281885147094727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0938029289245605, "epoch": 0.49, "learning_rate": 2.4302620456466612e-05, "loss": 1.9214, "step": 575, "task_loss": 1.024495005607605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9444427490234375, "epoch": 0.49, "learning_rate": 2.4344885883347425e-05, "loss": 1.9217, "step": 576, "task_loss": 0.9162797927856445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.197317123413086, "epoch": 0.49, "learning_rate": 2.4387151310228235e-05, "loss": 1.8446, "step": 577, "task_loss": 1.2048108577728271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.508453369140625, "epoch": 0.49, "learning_rate": 2.4429416737109044e-05, "loss": 2.0515, "step": 578, "task_loss": 1.538988471031189 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8542488813400269, "epoch": 0.49, "learning_rate": 2.4471682163989857e-05, "loss": 1.7664, "step": 579, "task_loss": 1.781377911567688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7428669929504395, "epoch": 0.49, "learning_rate": 2.451394759087067e-05, "loss": 2.0767, "step": 580, "task_loss": 1.3607803583145142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2964797019958496, "epoch": 0.49, "learning_rate": 2.4556213017751482e-05, "loss": 1.7215, "step": 581, "task_loss": 0.7132265567779541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6482292413711548, "epoch": 0.49, "learning_rate": 2.459847844463229e-05, "loss": 1.8297, "step": 582, "task_loss": 1.6295242309570312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6475512981414795, "epoch": 0.49, "learning_rate": 2.4640743871513104e-05, "loss": 1.832, "step": 583, "task_loss": 0.6759210824966431 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9974396228790283, "epoch": 0.49, "learning_rate": 2.4683009298393914e-05, "loss": 2.1543, "step": 584, "task_loss": 1.1973074674606323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.976629376411438, "epoch": 0.49, "learning_rate": 2.4725274725274727e-05, "loss": 1.9337, "step": 585, "task_loss": 1.5170599222183228 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5698022842407227, "epoch": 0.5, "learning_rate": 2.476754015215554e-05, "loss": 1.431, "step": 586, "task_loss": 0.2588977515697479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3749134540557861, "epoch": 0.5, "learning_rate": 2.480980557903635e-05, "loss": 1.5961, "step": 587, "task_loss": 0.9060823917388916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.853061318397522, "epoch": 0.5, "learning_rate": 2.485207100591716e-05, "loss": 2.0219, "step": 588, "task_loss": 2.1944754123687744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.69722580909729, "epoch": 0.5, "learning_rate": 2.489433643279797e-05, "loss": 1.9866, "step": 589, "task_loss": 1.0675206184387207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.708045482635498, "epoch": 0.5, "learning_rate": 2.4936601859678784e-05, "loss": 1.7313, "step": 590, "task_loss": 1.2448363304138184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4297287464141846, "epoch": 0.5, "learning_rate": 2.4978867286559597e-05, "loss": 1.4031, "step": 591, "task_loss": 0.7830991148948669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1071958541870117, "epoch": 0.5, "learning_rate": 2.502113271344041e-05, "loss": 1.8925, "step": 592, "task_loss": 0.7805461287498474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0951056480407715, "epoch": 0.5, "learning_rate": 2.506339814032122e-05, "loss": 1.7374, "step": 593, "task_loss": 1.2993216514587402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3728773593902588, "epoch": 0.5, "learning_rate": 2.510566356720203e-05, "loss": 1.4078, "step": 594, "task_loss": 0.7374497652053833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7998437881469727, "epoch": 0.5, "learning_rate": 2.514792899408284e-05, "loss": 1.4855, "step": 595, "task_loss": 0.5659290552139282 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.62764573097229, "epoch": 0.5, "learning_rate": 2.5190194420963654e-05, "loss": 1.5772, "step": 596, "task_loss": 1.453402042388916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6215574741363525, "epoch": 0.5, "learning_rate": 2.5232459847844463e-05, "loss": 1.6867, "step": 597, "task_loss": 0.7059016227722168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.3166096210479736, "epoch": 0.51, "learning_rate": 2.5274725274725276e-05, "loss": 2.0619, "step": 598, "task_loss": 2.1335737705230713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3929041624069214, "epoch": 0.51, "learning_rate": 2.5316990701606085e-05, "loss": 1.5103, "step": 599, "task_loss": 1.1276601552963257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1908389329910278, "epoch": 0.51, "learning_rate": 2.5359256128486898e-05, "loss": 1.6955, "step": 600, "task_loss": 1.167147159576416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5818207263946533, "epoch": 0.51, "learning_rate": 2.5401521555367707e-05, "loss": 2.1147, "step": 601, "task_loss": 1.7763690948486328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6887223720550537, "epoch": 0.51, "learning_rate": 2.5443786982248524e-05, "loss": 1.8799, "step": 602, "task_loss": 1.3978850841522217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7447035312652588, "epoch": 0.51, "learning_rate": 2.5486052409129336e-05, "loss": 1.4866, "step": 603, "task_loss": 0.8254601359367371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6267715692520142, "epoch": 0.51, "learning_rate": 2.5528317836010146e-05, "loss": 1.4353, "step": 604, "task_loss": 1.2943872213363647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.047501564025879, "epoch": 0.51, "learning_rate": 2.557058326289096e-05, "loss": 1.5721, "step": 605, "task_loss": 1.5931819677352905 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.149880886077881, "epoch": 0.51, "learning_rate": 2.5612848689771768e-05, "loss": 1.5305, "step": 606, "task_loss": 0.8054128289222717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.7630341053009033, "epoch": 0.51, "learning_rate": 2.5655114116652577e-05, "loss": 2.0451, "step": 607, "task_loss": 1.2330178022384644 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7856993675231934, "epoch": 0.51, "learning_rate": 2.569737954353339e-05, "loss": 1.7132, "step": 608, "task_loss": 1.1315422058105469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.381185531616211, "epoch": 0.51, "learning_rate": 2.57396449704142e-05, "loss": 1.7236, "step": 609, "task_loss": 1.0396686792373657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9953889846801758, "epoch": 0.52, "learning_rate": 2.5781910397295012e-05, "loss": 1.5072, "step": 610, "task_loss": 1.0070775747299194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2992146015167236, "epoch": 0.52, "learning_rate": 2.582417582417583e-05, "loss": 1.3657, "step": 611, "task_loss": 1.0016437768936157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3119258880615234, "epoch": 0.52, "learning_rate": 2.5866441251056638e-05, "loss": 1.5179, "step": 612, "task_loss": 0.8120308518409729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5350935459136963, "epoch": 0.52, "learning_rate": 2.590870667793745e-05, "loss": 1.723, "step": 613, "task_loss": 1.5326266288757324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.525170087814331, "epoch": 0.52, "learning_rate": 2.595097210481826e-05, "loss": 1.8695, "step": 614, "task_loss": 0.8368762731552124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6651086807250977, "epoch": 0.52, "learning_rate": 2.5993237531699073e-05, "loss": 1.7935, "step": 615, "task_loss": 2.062633514404297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.909796953201294, "epoch": 0.52, "learning_rate": 2.6035502958579882e-05, "loss": 1.7867, "step": 616, "task_loss": 1.7751398086547852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.549363374710083, "epoch": 0.52, "learning_rate": 2.6077768385460695e-05, "loss": 1.5624, "step": 617, "task_loss": 1.7757426500320435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.892034649848938, "epoch": 0.52, "learning_rate": 2.6120033812341504e-05, "loss": 1.3632, "step": 618, "task_loss": 1.0769318342208862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.679600477218628, "epoch": 0.52, "learning_rate": 2.6162299239222317e-05, "loss": 1.6038, "step": 619, "task_loss": 0.9951533079147339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7912898063659668, "epoch": 0.52, "learning_rate": 2.6204564666103126e-05, "loss": 1.5065, "step": 620, "task_loss": 1.2363510131835938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.710914969444275, "epoch": 0.52, "learning_rate": 2.6246830092983943e-05, "loss": 1.7165, "step": 621, "task_loss": 1.1835548877716064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.003048896789551, "epoch": 0.53, "learning_rate": 2.6289095519864755e-05, "loss": 1.5246, "step": 622, "task_loss": 1.959931492805481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.295395851135254, "epoch": 0.53, "learning_rate": 2.6331360946745565e-05, "loss": 1.3173, "step": 623, "task_loss": 0.6880345940589905 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0663676261901855, "epoch": 0.53, "learning_rate": 2.6373626373626374e-05, "loss": 1.612, "step": 624, "task_loss": 1.098173975944519 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.634093999862671, "epoch": 0.53, "learning_rate": 2.6415891800507187e-05, "loss": 1.8847, "step": 625, "task_loss": 1.153710126876831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.4886484146118164, "epoch": 0.53, "learning_rate": 2.6458157227387996e-05, "loss": 1.8467, "step": 626, "task_loss": 1.3716340065002441 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.729980230331421, "epoch": 0.53, "learning_rate": 2.650042265426881e-05, "loss": 1.5916, "step": 627, "task_loss": 0.9154810905456543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6098480224609375, "epoch": 0.53, "learning_rate": 2.654268808114962e-05, "loss": 1.578, "step": 628, "task_loss": 0.9957693815231323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.494384527206421, "epoch": 0.53, "learning_rate": 2.658495350803043e-05, "loss": 1.2136, "step": 629, "task_loss": 1.2921324968338013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7558704614639282, "epoch": 0.53, "learning_rate": 2.6627218934911247e-05, "loss": 1.5007, "step": 630, "task_loss": 1.6387611627578735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0883708000183105, "epoch": 0.53, "learning_rate": 2.6669484361792057e-05, "loss": 1.7075, "step": 631, "task_loss": 0.6809197068214417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.852921962738037, "epoch": 0.53, "learning_rate": 2.671174978867287e-05, "loss": 1.5448, "step": 632, "task_loss": 1.8660945892333984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9041959047317505, "epoch": 0.53, "learning_rate": 2.675401521555368e-05, "loss": 1.3842, "step": 633, "task_loss": 0.9388272762298584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.082602024078369, "epoch": 0.54, "learning_rate": 2.6796280642434492e-05, "loss": 1.7751, "step": 634, "task_loss": 1.0089634656906128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2200088500976562, "epoch": 0.54, "learning_rate": 2.68385460693153e-05, "loss": 1.278, "step": 635, "task_loss": 0.652645468711853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4655547142028809, "epoch": 0.54, "learning_rate": 2.688081149619611e-05, "loss": 1.5996, "step": 636, "task_loss": 1.7563261985778809 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2924853563308716, "epoch": 0.54, "learning_rate": 2.6923076923076923e-05, "loss": 1.5597, "step": 637, "task_loss": 0.7756779193878174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3127920627593994, "epoch": 0.54, "learning_rate": 2.6965342349957733e-05, "loss": 1.4632, "step": 638, "task_loss": 1.6104316711425781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4918755292892456, "epoch": 0.54, "learning_rate": 2.7007607776838545e-05, "loss": 1.9172, "step": 639, "task_loss": 0.3225052058696747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.93758487701416, "epoch": 0.54, "learning_rate": 2.704987320371936e-05, "loss": 1.8743, "step": 640, "task_loss": 1.4855830669403076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1299769878387451, "epoch": 0.54, "learning_rate": 2.709213863060017e-05, "loss": 1.5022, "step": 641, "task_loss": 0.9783194065093994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5393813848495483, "epoch": 0.54, "learning_rate": 2.7134404057480984e-05, "loss": 1.7779, "step": 642, "task_loss": 1.7008614540100098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9785966873168945, "epoch": 0.54, "learning_rate": 2.7176669484361793e-05, "loss": 1.7705, "step": 643, "task_loss": 1.4615395069122314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0557277202606201, "epoch": 0.54, "learning_rate": 2.7218934911242606e-05, "loss": 1.5643, "step": 644, "task_loss": 0.6099132299423218 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6285157203674316, "epoch": 0.54, "learning_rate": 2.7261200338123415e-05, "loss": 1.6051, "step": 645, "task_loss": 0.8883882164955139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6315865516662598, "epoch": 0.55, "learning_rate": 2.7303465765004228e-05, "loss": 1.4772, "step": 646, "task_loss": 0.7742103338241577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8141098022460938, "epoch": 0.55, "learning_rate": 2.7345731191885038e-05, "loss": 1.3262, "step": 647, "task_loss": 1.1860442161560059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.743987798690796, "epoch": 0.55, "learning_rate": 2.738799661876585e-05, "loss": 1.7257, "step": 648, "task_loss": 0.7734543681144714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4315391778945923, "epoch": 0.55, "learning_rate": 2.743026204564666e-05, "loss": 1.2168, "step": 649, "task_loss": 0.7536091208457947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9513049125671387, "epoch": 0.55, "learning_rate": 2.7472527472527476e-05, "loss": 1.3687, "step": 650, "task_loss": 1.316762089729309 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.902191162109375, "epoch": 0.55, "learning_rate": 2.751479289940829e-05, "loss": 1.7116, "step": 651, "task_loss": 1.294599175453186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4146677255630493, "epoch": 0.55, "learning_rate": 2.7557058326289098e-05, "loss": 1.0504, "step": 652, "task_loss": 0.8387738466262817 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7566132545471191, "epoch": 0.55, "learning_rate": 2.7599323753169907e-05, "loss": 1.3716, "step": 653, "task_loss": 0.5043914318084717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.280056118965149, "epoch": 0.55, "learning_rate": 2.764158918005072e-05, "loss": 1.182, "step": 654, "task_loss": 0.8541724681854248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0463507175445557, "epoch": 0.55, "learning_rate": 2.768385460693153e-05, "loss": 1.4394, "step": 655, "task_loss": 0.7332131266593933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.738767385482788, "epoch": 0.55, "learning_rate": 2.7726120033812342e-05, "loss": 1.493, "step": 656, "task_loss": 1.027692437171936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.064136266708374, "epoch": 0.56, "learning_rate": 2.7768385460693152e-05, "loss": 1.2452, "step": 657, "task_loss": 0.7290031909942627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5316979885101318, "epoch": 0.56, "learning_rate": 2.7810650887573965e-05, "loss": 1.5315, "step": 658, "task_loss": 0.6630301475524902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.287901520729065, "epoch": 0.56, "learning_rate": 2.785291631445478e-05, "loss": 1.5145, "step": 659, "task_loss": 1.1887342929840088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9261849522590637, "epoch": 0.56, "learning_rate": 2.789518174133559e-05, "loss": 1.7809, "step": 660, "task_loss": 0.6622560620307922 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.028679847717285, "epoch": 0.56, "learning_rate": 2.7937447168216403e-05, "loss": 1.7676, "step": 661, "task_loss": 1.0231919288635254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7194116115570068, "epoch": 0.56, "learning_rate": 2.7979712595097212e-05, "loss": 1.5794, "step": 662, "task_loss": 1.897404432296753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5506665706634521, "epoch": 0.56, "learning_rate": 2.8021978021978025e-05, "loss": 1.4879, "step": 663, "task_loss": 0.7307351231575012 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4818305969238281, "epoch": 0.56, "learning_rate": 2.8064243448858834e-05, "loss": 1.5979, "step": 664, "task_loss": 0.8139563202857971 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9212216138839722, "epoch": 0.56, "learning_rate": 2.8106508875739644e-05, "loss": 1.4531, "step": 665, "task_loss": 1.4810649156570435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8232896327972412, "epoch": 0.56, "learning_rate": 2.8148774302620457e-05, "loss": 1.2934, "step": 666, "task_loss": 1.1055258512496948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.690585732460022, "epoch": 0.56, "learning_rate": 2.8191039729501266e-05, "loss": 1.3457, "step": 667, "task_loss": 1.3411298990249634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6132636070251465, "epoch": 0.56, "learning_rate": 2.823330515638208e-05, "loss": 1.5371, "step": 668, "task_loss": 1.6775598526000977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5915604829788208, "epoch": 0.57, "learning_rate": 2.8275570583262895e-05, "loss": 1.3924, "step": 669, "task_loss": 1.8636835813522339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6905701160430908, "epoch": 0.57, "learning_rate": 2.8317836010143704e-05, "loss": 1.5717, "step": 670, "task_loss": 1.423853874206543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.640898585319519, "epoch": 0.57, "learning_rate": 2.8360101437024517e-05, "loss": 1.7982, "step": 671, "task_loss": 1.6273577213287354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2116680145263672, "epoch": 0.57, "learning_rate": 2.8402366863905327e-05, "loss": 1.4491, "step": 672, "task_loss": 0.6692990660667419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7327733039855957, "epoch": 0.57, "learning_rate": 2.844463229078614e-05, "loss": 1.384, "step": 673, "task_loss": 1.9309717416763306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.06645667552948, "epoch": 0.57, "learning_rate": 2.848689771766695e-05, "loss": 1.2971, "step": 674, "task_loss": 0.8362554311752319 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9929766654968262, "epoch": 0.57, "learning_rate": 2.852916314454776e-05, "loss": 1.6963, "step": 675, "task_loss": 1.5742586851119995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.972877025604248, "epoch": 0.57, "learning_rate": 2.857142857142857e-05, "loss": 1.7077, "step": 676, "task_loss": 0.9387524724006653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4830821752548218, "epoch": 0.57, "learning_rate": 2.8613693998309384e-05, "loss": 1.4447, "step": 677, "task_loss": 0.6409228444099426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4077425003051758, "epoch": 0.57, "learning_rate": 2.8655959425190193e-05, "loss": 1.3521, "step": 678, "task_loss": 0.550044596195221 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3166168928146362, "epoch": 0.57, "learning_rate": 2.869822485207101e-05, "loss": 1.3962, "step": 679, "task_loss": 1.3643676042556763 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.506561040878296, "epoch": 0.57, "learning_rate": 2.8740490278951822e-05, "loss": 1.2574, "step": 680, "task_loss": 1.370517611503601 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.212721347808838, "epoch": 0.58, "learning_rate": 2.878275570583263e-05, "loss": 1.6928, "step": 681, "task_loss": 1.0444506406784058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3113288879394531, "epoch": 0.58, "learning_rate": 2.882502113271344e-05, "loss": 1.6318, "step": 682, "task_loss": 0.5907636880874634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.969308614730835, "epoch": 0.58, "learning_rate": 2.8867286559594254e-05, "loss": 1.4938, "step": 683, "task_loss": 1.1038538217544556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1025445461273193, "epoch": 0.58, "learning_rate": 2.8909551986475063e-05, "loss": 1.1247, "step": 684, "task_loss": 1.1854864358901978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8736121654510498, "epoch": 0.58, "learning_rate": 2.8951817413355876e-05, "loss": 1.3424, "step": 685, "task_loss": 1.2612439393997192 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.095592498779297, "epoch": 0.58, "learning_rate": 2.8994082840236685e-05, "loss": 1.4531, "step": 686, "task_loss": 0.6400595903396606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5747075080871582, "epoch": 0.58, "learning_rate": 2.9036348267117498e-05, "loss": 1.375, "step": 687, "task_loss": 1.830064058303833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0295298099517822, "epoch": 0.58, "learning_rate": 2.9078613693998314e-05, "loss": 1.1572, "step": 688, "task_loss": 0.9261499047279358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.271148443222046, "epoch": 0.58, "learning_rate": 2.9120879120879123e-05, "loss": 1.2934, "step": 689, "task_loss": 0.48674076795578003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.081086277961731, "epoch": 0.58, "learning_rate": 2.9163144547759936e-05, "loss": 1.4223, "step": 690, "task_loss": 0.5603989362716675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9003047943115234, "epoch": 0.58, "learning_rate": 2.9205409974640746e-05, "loss": 1.2764, "step": 691, "task_loss": 0.30261579155921936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4692645072937012, "epoch": 0.58, "learning_rate": 2.924767540152156e-05, "loss": 1.5004, "step": 692, "task_loss": 1.3463819026947021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.410404086112976, "epoch": 0.59, "learning_rate": 2.9289940828402368e-05, "loss": 1.5202, "step": 693, "task_loss": 1.6897257566452026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.476215362548828, "epoch": 0.59, "learning_rate": 2.933220625528318e-05, "loss": 1.4379, "step": 694, "task_loss": 1.4361754655838013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5503571033477783, "epoch": 0.59, "learning_rate": 2.937447168216399e-05, "loss": 1.4202, "step": 695, "task_loss": 1.408144235610962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1030468940734863, "epoch": 0.59, "learning_rate": 2.94167371090448e-05, "loss": 1.2857, "step": 696, "task_loss": 0.6506267189979553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2089228630065918, "epoch": 0.59, "learning_rate": 2.9459002535925612e-05, "loss": 1.1625, "step": 697, "task_loss": 0.8690208196640015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.834296703338623, "epoch": 0.59, "learning_rate": 2.9501267962806428e-05, "loss": 1.4272, "step": 698, "task_loss": 0.6341500878334045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3541754484176636, "epoch": 0.59, "learning_rate": 2.9543533389687238e-05, "loss": 1.3802, "step": 699, "task_loss": 0.592223584651947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2655833959579468, "epoch": 0.59, "learning_rate": 2.958579881656805e-05, "loss": 1.4258, "step": 700, "task_loss": 0.9884085059165955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2916467189788818, "epoch": 0.59, "learning_rate": 2.962806424344886e-05, "loss": 1.5177, "step": 701, "task_loss": 0.874997079372406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8093833923339844, "epoch": 0.59, "learning_rate": 2.9670329670329673e-05, "loss": 1.1025, "step": 702, "task_loss": 0.7675526142120361 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3708703517913818, "epoch": 0.59, "learning_rate": 2.9712595097210482e-05, "loss": 1.3874, "step": 703, "task_loss": 1.0183649063110352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7188658714294434, "epoch": 0.59, "learning_rate": 2.9754860524091295e-05, "loss": 1.6952, "step": 704, "task_loss": 1.8258830308914185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0226402282714844, "epoch": 0.6, "learning_rate": 2.9797125950972104e-05, "loss": 1.5159, "step": 705, "task_loss": 2.2675790786743164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4809933602809906, "epoch": 0.6, "learning_rate": 2.9839391377852917e-05, "loss": 1.0833, "step": 706, "task_loss": 0.28832143545150757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.244240403175354, "epoch": 0.6, "learning_rate": 2.9881656804733733e-05, "loss": 1.4922, "step": 707, "task_loss": 1.0591614246368408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4016245603561401, "epoch": 0.6, "learning_rate": 2.9923922231614543e-05, "loss": 1.3485, "step": 708, "task_loss": 1.088834524154663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1992992162704468, "epoch": 0.6, "learning_rate": 2.9966187658495355e-05, "loss": 1.815, "step": 709, "task_loss": 0.6973622441291809 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8718209862709045, "epoch": 0.6, "learning_rate": 3.0008453085376165e-05, "loss": 1.5533, "step": 710, "task_loss": 1.260893702507019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1830474138259888, "epoch": 0.6, "learning_rate": 3.0050718512256974e-05, "loss": 1.4933, "step": 711, "task_loss": 1.0625029802322388 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5134824514389038, "epoch": 0.6, "learning_rate": 3.0092983939137787e-05, "loss": 1.6332, "step": 712, "task_loss": 1.1668223142623901 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1905092000961304, "epoch": 0.6, "learning_rate": 3.0135249366018596e-05, "loss": 1.0176, "step": 713, "task_loss": 0.8453917503356934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7569429874420166, "epoch": 0.6, "learning_rate": 3.017751479289941e-05, "loss": 1.8035, "step": 714, "task_loss": 0.8546710014343262 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1078470945358276, "epoch": 0.6, "learning_rate": 3.021978021978022e-05, "loss": 1.0731, "step": 715, "task_loss": 0.9059644341468811 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.101989507675171, "epoch": 0.6, "learning_rate": 3.026204564666103e-05, "loss": 1.0711, "step": 716, "task_loss": 1.2225315570831299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.352318525314331, "epoch": 0.61, "learning_rate": 3.0304311073541847e-05, "loss": 1.3418, "step": 717, "task_loss": 1.3182826042175293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1681978702545166, "epoch": 0.61, "learning_rate": 3.0346576500422657e-05, "loss": 1.2568, "step": 718, "task_loss": 0.7729310393333435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3195886611938477, "epoch": 0.61, "learning_rate": 3.038884192730347e-05, "loss": 1.3346, "step": 719, "task_loss": 1.0368680953979492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2236554622650146, "epoch": 0.61, "learning_rate": 3.043110735418428e-05, "loss": 1.551, "step": 720, "task_loss": 1.106539011001587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4266663789749146, "epoch": 0.61, "learning_rate": 3.047337278106509e-05, "loss": 1.1327, "step": 721, "task_loss": 1.776548147201538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4948453903198242, "epoch": 0.61, "learning_rate": 3.05156382079459e-05, "loss": 1.1435, "step": 722, "task_loss": 0.9376305937767029 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9035033583641052, "epoch": 0.61, "learning_rate": 3.0557903634826714e-05, "loss": 1.5043, "step": 723, "task_loss": 0.5116782784461975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4848235845565796, "epoch": 0.61, "learning_rate": 3.060016906170752e-05, "loss": 1.293, "step": 724, "task_loss": 1.6778664588928223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7731674909591675, "epoch": 0.61, "learning_rate": 3.064243448858833e-05, "loss": 1.322, "step": 725, "task_loss": 1.80498206615448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0425477027893066, "epoch": 0.61, "learning_rate": 3.068469991546914e-05, "loss": 1.3337, "step": 726, "task_loss": 0.8004480600357056 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2316479682922363, "epoch": 0.61, "learning_rate": 3.072696534234996e-05, "loss": 1.167, "step": 727, "task_loss": 0.9221301078796387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8982270956039429, "epoch": 0.61, "learning_rate": 3.0769230769230774e-05, "loss": 1.1234, "step": 728, "task_loss": 0.6189544796943665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0983774662017822, "epoch": 0.62, "learning_rate": 3.0811496196111584e-05, "loss": 1.3585, "step": 729, "task_loss": 1.281092643737793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4596924781799316, "epoch": 0.62, "learning_rate": 3.085376162299239e-05, "loss": 1.4296, "step": 730, "task_loss": 1.1552454233169556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9852583408355713, "epoch": 0.62, "learning_rate": 3.08960270498732e-05, "loss": 1.0973, "step": 731, "task_loss": 1.1398241519927979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6978572607040405, "epoch": 0.62, "learning_rate": 3.093829247675402e-05, "loss": 1.2197, "step": 732, "task_loss": 1.0865951776504517 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8431758880615234, "epoch": 0.62, "learning_rate": 3.098055790363483e-05, "loss": 1.2858, "step": 733, "task_loss": 1.8910380601882935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2466399669647217, "epoch": 0.62, "learning_rate": 3.102282333051564e-05, "loss": 1.3684, "step": 734, "task_loss": 0.8303590416908264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8037484288215637, "epoch": 0.62, "learning_rate": 3.106508875739645e-05, "loss": 1.272, "step": 735, "task_loss": 0.6720357537269592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.295682668685913, "epoch": 0.62, "learning_rate": 3.110735418427726e-05, "loss": 1.3362, "step": 736, "task_loss": 0.8334139585494995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3920183181762695, "epoch": 0.62, "learning_rate": 3.114961961115808e-05, "loss": 0.9058, "step": 737, "task_loss": 0.8491803407669067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6132968664169312, "epoch": 0.62, "learning_rate": 3.119188503803889e-05, "loss": 1.4793, "step": 738, "task_loss": 0.5197634100914001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.234397530555725, "epoch": 0.62, "learning_rate": 3.12341504649197e-05, "loss": 1.4346, "step": 739, "task_loss": 1.0549747943878174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9419648051261902, "epoch": 0.63, "learning_rate": 3.127641589180051e-05, "loss": 0.8468, "step": 740, "task_loss": 0.902725338935852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1524955034255981, "epoch": 0.63, "learning_rate": 3.131868131868132e-05, "loss": 1.1907, "step": 741, "task_loss": 0.298162043094635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7229589223861694, "epoch": 0.63, "learning_rate": 3.136094674556213e-05, "loss": 1.3349, "step": 742, "task_loss": 1.8567523956298828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5725269317626953, "epoch": 0.63, "learning_rate": 3.140321217244294e-05, "loss": 1.3297, "step": 743, "task_loss": 1.9470077753067017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5280539989471436, "epoch": 0.63, "learning_rate": 3.144547759932375e-05, "loss": 1.247, "step": 744, "task_loss": 1.9672532081604004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3714734315872192, "epoch": 0.63, "learning_rate": 3.148774302620456e-05, "loss": 1.2006, "step": 745, "task_loss": 0.6908479332923889 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9341222047805786, "epoch": 0.63, "learning_rate": 3.153000845308538e-05, "loss": 1.2391, "step": 746, "task_loss": 1.6975568532943726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6665817499160767, "epoch": 0.63, "learning_rate": 3.1572273879966193e-05, "loss": 1.081, "step": 747, "task_loss": 0.3565264344215393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1943625211715698, "epoch": 0.63, "learning_rate": 3.1614539306847e-05, "loss": 1.2579, "step": 748, "task_loss": 1.6026577949523926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9157307744026184, "epoch": 0.63, "learning_rate": 3.165680473372781e-05, "loss": 1.0939, "step": 749, "task_loss": 0.6745445728302002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6101644039154053, "epoch": 0.63, "learning_rate": 3.169907016060862e-05, "loss": 1.4858, "step": 750, "task_loss": 0.7834869623184204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3131904602050781, "epoch": 0.63, "learning_rate": 3.174133558748944e-05, "loss": 1.0678, "step": 751, "task_loss": 1.1239429712295532 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0606067180633545, "epoch": 0.64, "learning_rate": 3.178360101437025e-05, "loss": 1.2696, "step": 752, "task_loss": 1.0687857866287231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1304645538330078, "epoch": 0.64, "learning_rate": 3.1825866441251057e-05, "loss": 1.0809, "step": 753, "task_loss": 1.418251395225525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9744993448257446, "epoch": 0.64, "learning_rate": 3.1868131868131866e-05, "loss": 0.8588, "step": 754, "task_loss": 0.9110987782478333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9069413542747498, "epoch": 0.64, "learning_rate": 3.1910397295012675e-05, "loss": 0.8936, "step": 755, "task_loss": 0.7056465744972229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8215574026107788, "epoch": 0.64, "learning_rate": 3.195266272189349e-05, "loss": 1.0976, "step": 756, "task_loss": 0.7941426634788513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8916579484939575, "epoch": 0.64, "learning_rate": 3.199492814877431e-05, "loss": 1.5294, "step": 757, "task_loss": 1.3363746404647827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9486895799636841, "epoch": 0.64, "learning_rate": 3.203719357565512e-05, "loss": 1.0955, "step": 758, "task_loss": 0.5600053071975708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2471811771392822, "epoch": 0.64, "learning_rate": 3.2079459002535926e-05, "loss": 0.9874, "step": 759, "task_loss": 0.251995325088501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7409297227859497, "epoch": 0.64, "learning_rate": 3.2121724429416736e-05, "loss": 1.2777, "step": 760, "task_loss": 0.8875173330307007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8959306478500366, "epoch": 0.64, "learning_rate": 3.216398985629755e-05, "loss": 1.2618, "step": 761, "task_loss": 0.6971248388290405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4230185747146606, "epoch": 0.64, "learning_rate": 3.220625528317836e-05, "loss": 0.8951, "step": 762, "task_loss": 1.7325869798660278 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2011255025863647, "epoch": 0.64, "learning_rate": 3.224852071005917e-05, "loss": 1.2785, "step": 763, "task_loss": 0.4973919093608856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1802830696105957, "epoch": 0.65, "learning_rate": 3.229078613693998e-05, "loss": 1.237, "step": 764, "task_loss": 1.012241005897522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.114047050476074, "epoch": 0.65, "learning_rate": 3.2333051563820796e-05, "loss": 1.533, "step": 765, "task_loss": 0.8473191857337952 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2876031398773193, "epoch": 0.65, "learning_rate": 3.237531699070161e-05, "loss": 1.2606, "step": 766, "task_loss": 1.050636649131775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9813335537910461, "epoch": 0.65, "learning_rate": 3.241758241758242e-05, "loss": 1.443, "step": 767, "task_loss": 1.1777281761169434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8759397268295288, "epoch": 0.65, "learning_rate": 3.245984784446323e-05, "loss": 1.3032, "step": 768, "task_loss": 0.6305854320526123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8180917501449585, "epoch": 0.65, "learning_rate": 3.250211327134404e-05, "loss": 0.9856, "step": 769, "task_loss": 1.4123114347457886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.648061752319336, "epoch": 0.65, "learning_rate": 3.254437869822485e-05, "loss": 1.2705, "step": 770, "task_loss": 1.7551202774047852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3258916139602661, "epoch": 0.65, "learning_rate": 3.2586644125105666e-05, "loss": 1.3874, "step": 771, "task_loss": 0.671794056892395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2110213041305542, "epoch": 0.65, "learning_rate": 3.2628909551986476e-05, "loss": 0.9548, "step": 772, "task_loss": 1.1655330657958984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0431435108184814, "epoch": 0.65, "learning_rate": 3.2671174978867285e-05, "loss": 1.3146, "step": 773, "task_loss": 0.5331472158432007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0134350061416626, "epoch": 0.65, "learning_rate": 3.2713440405748094e-05, "loss": 1.0342, "step": 774, "task_loss": 1.499124526977539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9231371283531189, "epoch": 0.65, "learning_rate": 3.275570583262891e-05, "loss": 1.0063, "step": 775, "task_loss": 0.8327792882919312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5255553722381592, "epoch": 0.66, "learning_rate": 3.279797125950973e-05, "loss": 1.35, "step": 776, "task_loss": 1.2116856575012207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8038870692253113, "epoch": 0.66, "learning_rate": 3.2840236686390536e-05, "loss": 1.0083, "step": 777, "task_loss": 0.1597527265548706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3178194761276245, "epoch": 0.66, "learning_rate": 3.2882502113271346e-05, "loss": 0.9046, "step": 778, "task_loss": 1.2653892040252686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4342961311340332, "epoch": 0.66, "learning_rate": 3.2924767540152155e-05, "loss": 0.9873, "step": 779, "task_loss": 0.8460213541984558 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1236512660980225, "epoch": 0.66, "learning_rate": 3.296703296703297e-05, "loss": 1.2076, "step": 780, "task_loss": 0.5499159693717957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9567474722862244, "epoch": 0.66, "learning_rate": 3.300929839391378e-05, "loss": 1.1119, "step": 781, "task_loss": 0.809646487236023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9742047786712646, "epoch": 0.66, "learning_rate": 3.305156382079459e-05, "loss": 1.0458, "step": 782, "task_loss": 1.681294560432434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9782305359840393, "epoch": 0.66, "learning_rate": 3.30938292476754e-05, "loss": 0.9076, "step": 783, "task_loss": 0.4198245704174042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5671266317367554, "epoch": 0.66, "learning_rate": 3.3136094674556215e-05, "loss": 1.1921, "step": 784, "task_loss": 0.7377972602844238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4524136781692505, "epoch": 0.66, "learning_rate": 3.317836010143703e-05, "loss": 1.0193, "step": 785, "task_loss": 1.0683938264846802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0102667808532715, "epoch": 0.66, "learning_rate": 3.322062552831784e-05, "loss": 1.1341, "step": 786, "task_loss": 0.3124377429485321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7324913740158081, "epoch": 0.66, "learning_rate": 3.326289095519865e-05, "loss": 1.1906, "step": 787, "task_loss": 0.5846706628799438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.000836730003357, "epoch": 0.67, "learning_rate": 3.330515638207946e-05, "loss": 1.1587, "step": 788, "task_loss": 0.6556212306022644 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.477790355682373, "epoch": 0.67, "learning_rate": 3.334742180896027e-05, "loss": 1.1226, "step": 789, "task_loss": 0.8545069694519043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1533420085906982, "epoch": 0.67, "learning_rate": 3.3389687235841085e-05, "loss": 1.1891, "step": 790, "task_loss": 1.387486219406128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1043310165405273, "epoch": 0.67, "learning_rate": 3.3431952662721895e-05, "loss": 1.3511, "step": 791, "task_loss": 0.3758786916732788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8419867753982544, "epoch": 0.67, "learning_rate": 3.3474218089602704e-05, "loss": 1.06, "step": 792, "task_loss": 0.9997478723526001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0536984205245972, "epoch": 0.67, "learning_rate": 3.3516483516483513e-05, "loss": 1.0744, "step": 793, "task_loss": 0.6990212202072144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4985753297805786, "epoch": 0.67, "learning_rate": 3.355874894336433e-05, "loss": 1.0781, "step": 794, "task_loss": 1.4334042072296143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6019961833953857, "epoch": 0.67, "learning_rate": 3.3601014370245146e-05, "loss": 1.1531, "step": 795, "task_loss": 0.6866658926010132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4568971395492554, "epoch": 0.67, "learning_rate": 3.3643279797125955e-05, "loss": 1.1843, "step": 796, "task_loss": 1.0149518251419067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6637656688690186, "epoch": 0.67, "learning_rate": 3.3685545224006765e-05, "loss": 1.2721, "step": 797, "task_loss": 1.3506348133087158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9751286506652832, "epoch": 0.67, "learning_rate": 3.3727810650887574e-05, "loss": 1.3405, "step": 798, "task_loss": 2.407045841217041 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7023255228996277, "epoch": 0.67, "learning_rate": 3.377007607776838e-05, "loss": 1.1455, "step": 799, "task_loss": 0.5517940521240234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.832042932510376, "epoch": 0.68, "learning_rate": 3.38123415046492e-05, "loss": 1.1657, "step": 800, "task_loss": 0.642217218875885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6140612959861755, "epoch": 0.68, "learning_rate": 3.385460693153001e-05, "loss": 0.8946, "step": 801, "task_loss": 0.20497922599315643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8967188596725464, "epoch": 0.68, "learning_rate": 3.389687235841082e-05, "loss": 1.3106, "step": 802, "task_loss": 1.3909839391708374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.235123634338379, "epoch": 0.68, "learning_rate": 3.393913778529163e-05, "loss": 1.4335, "step": 803, "task_loss": 1.3892109394073486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1082885265350342, "epoch": 0.68, "learning_rate": 3.3981403212172444e-05, "loss": 0.9231, "step": 804, "task_loss": 1.5899536609649658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.302131175994873, "epoch": 0.68, "learning_rate": 3.402366863905326e-05, "loss": 1.1279, "step": 805, "task_loss": 0.8378443717956543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7446038722991943, "epoch": 0.68, "learning_rate": 3.406593406593407e-05, "loss": 1.2825, "step": 806, "task_loss": 1.288346767425537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5007529258728027, "epoch": 0.68, "learning_rate": 3.410819949281488e-05, "loss": 1.1223, "step": 807, "task_loss": 1.0000859498977661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.42762291431427, "epoch": 0.68, "learning_rate": 3.415046491969569e-05, "loss": 1.2839, "step": 808, "task_loss": 1.3499641418457031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.124126672744751, "epoch": 0.68, "learning_rate": 3.4192730346576504e-05, "loss": 1.1239, "step": 809, "task_loss": 1.5311522483825684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2758653163909912, "epoch": 0.68, "learning_rate": 3.4234995773457314e-05, "loss": 1.1834, "step": 810, "task_loss": 1.4165246486663818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6905515789985657, "epoch": 0.69, "learning_rate": 3.427726120033812e-05, "loss": 0.7803, "step": 811, "task_loss": 0.20684468746185303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0458539724349976, "epoch": 0.69, "learning_rate": 3.431952662721893e-05, "loss": 1.1018, "step": 812, "task_loss": 0.5593903064727783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7537253499031067, "epoch": 0.69, "learning_rate": 3.436179205409975e-05, "loss": 1.0, "step": 813, "task_loss": 0.2418084591627121 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0663379430770874, "epoch": 0.69, "learning_rate": 3.4404057480980565e-05, "loss": 0.97, "step": 814, "task_loss": 1.0198121070861816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0145387649536133, "epoch": 0.69, "learning_rate": 3.4446322907861374e-05, "loss": 1.2676, "step": 815, "task_loss": 0.4640202224254608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44932273030281067, "epoch": 0.69, "learning_rate": 3.4488588334742184e-05, "loss": 0.8759, "step": 816, "task_loss": 0.36742517352104187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7345250248908997, "epoch": 0.69, "learning_rate": 3.453085376162299e-05, "loss": 0.886, "step": 817, "task_loss": 0.7532123923301697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7782611846923828, "epoch": 0.69, "learning_rate": 3.45731191885038e-05, "loss": 1.4267, "step": 818, "task_loss": 0.8026831746101379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2030867338180542, "epoch": 0.69, "learning_rate": 3.461538461538462e-05, "loss": 1.0095, "step": 819, "task_loss": 1.2915550470352173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6476097106933594, "epoch": 0.69, "learning_rate": 3.465765004226543e-05, "loss": 1.3524, "step": 820, "task_loss": 0.9218058586120605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7198113203048706, "epoch": 0.69, "learning_rate": 3.469991546914624e-05, "loss": 0.9321, "step": 821, "task_loss": 0.8357512950897217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5106363892555237, "epoch": 0.69, "learning_rate": 3.474218089602705e-05, "loss": 0.9283, "step": 822, "task_loss": 1.139270544052124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9693138599395752, "epoch": 0.7, "learning_rate": 3.478444632290786e-05, "loss": 0.9563, "step": 823, "task_loss": 0.7136973738670349 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.687039852142334, "epoch": 0.7, "learning_rate": 3.482671174978868e-05, "loss": 1.2604, "step": 824, "task_loss": 1.0616068840026855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9885623455047607, "epoch": 0.7, "learning_rate": 3.486897717666949e-05, "loss": 0.8908, "step": 825, "task_loss": 0.43361592292785645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8128790855407715, "epoch": 0.7, "learning_rate": 3.49112426035503e-05, "loss": 0.9325, "step": 826, "task_loss": 0.548980176448822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3427908420562744, "epoch": 0.7, "learning_rate": 3.495350803043111e-05, "loss": 1.0781, "step": 827, "task_loss": 1.242127776145935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5811742544174194, "epoch": 0.7, "learning_rate": 3.499577345731192e-05, "loss": 1.0975, "step": 828, "task_loss": 0.5328739881515503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7572734355926514, "epoch": 0.7, "learning_rate": 3.503803888419273e-05, "loss": 1.0388, "step": 829, "task_loss": 0.5490642786026001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.299677848815918, "epoch": 0.7, "learning_rate": 3.508030431107354e-05, "loss": 1.0595, "step": 830, "task_loss": 0.9584875106811523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.072669506072998, "epoch": 0.7, "learning_rate": 3.512256973795435e-05, "loss": 1.0262, "step": 831, "task_loss": 1.5655736923217773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1121697425842285, "epoch": 0.7, "learning_rate": 3.516483516483517e-05, "loss": 0.9799, "step": 832, "task_loss": 1.260759949684143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1049669981002808, "epoch": 0.7, "learning_rate": 3.520710059171598e-05, "loss": 1.2118, "step": 833, "task_loss": 2.2879703044891357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.726709246635437, "epoch": 0.7, "learning_rate": 3.524936601859679e-05, "loss": 1.2118, "step": 834, "task_loss": 1.3654814958572388 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2390391826629639, "epoch": 0.71, "learning_rate": 3.52916314454776e-05, "loss": 1.0295, "step": 835, "task_loss": 1.2186108827590942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2190589904785156, "epoch": 0.71, "learning_rate": 3.533389687235841e-05, "loss": 0.9746, "step": 836, "task_loss": 1.2877591848373413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6674307584762573, "epoch": 0.71, "learning_rate": 3.537616229923922e-05, "loss": 1.0957, "step": 837, "task_loss": 0.6909548044204712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8757596611976624, "epoch": 0.71, "learning_rate": 3.541842772612004e-05, "loss": 1.2086, "step": 838, "task_loss": 0.9716883301734924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3696362972259521, "epoch": 0.71, "learning_rate": 3.546069315300085e-05, "loss": 1.3047, "step": 839, "task_loss": 0.7878973484039307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7837753295898438, "epoch": 0.71, "learning_rate": 3.5502958579881656e-05, "loss": 0.8553, "step": 840, "task_loss": 1.2625706195831299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2305846214294434, "epoch": 0.71, "learning_rate": 3.5545224006762466e-05, "loss": 1.412, "step": 841, "task_loss": 1.0967711210250854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8645899295806885, "epoch": 0.71, "learning_rate": 3.558748943364328e-05, "loss": 1.2274, "step": 842, "task_loss": 1.1383715867996216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5731777548789978, "epoch": 0.71, "learning_rate": 3.56297548605241e-05, "loss": 1.0302, "step": 843, "task_loss": 0.36993101239204407 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0389149188995361, "epoch": 0.71, "learning_rate": 3.567202028740491e-05, "loss": 1.1082, "step": 844, "task_loss": 0.39726516604423523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.57378351688385, "epoch": 0.71, "learning_rate": 3.571428571428572e-05, "loss": 1.161, "step": 845, "task_loss": 0.7472212910652161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1425508260726929, "epoch": 0.71, "learning_rate": 3.5756551141166526e-05, "loss": 0.9979, "step": 846, "task_loss": 0.523829996585846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2345768213272095, "epoch": 0.72, "learning_rate": 3.5798816568047336e-05, "loss": 1.095, "step": 847, "task_loss": 1.792392373085022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1198616027832031, "epoch": 0.72, "learning_rate": 3.584108199492815e-05, "loss": 1.1384, "step": 848, "task_loss": 1.4903634786605835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8640360832214355, "epoch": 0.72, "learning_rate": 3.588334742180896e-05, "loss": 1.1551, "step": 849, "task_loss": 0.9473032355308533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7651304602622986, "epoch": 0.72, "learning_rate": 3.592561284868977e-05, "loss": 0.8807, "step": 850, "task_loss": 1.3689067363739014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9963942766189575, "epoch": 0.72, "learning_rate": 3.596787827557058e-05, "loss": 1.1705, "step": 851, "task_loss": 1.0959937572479248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.160984992980957, "epoch": 0.72, "learning_rate": 3.6010143702451396e-05, "loss": 1.0016, "step": 852, "task_loss": 0.9334763884544373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9615375995635986, "epoch": 0.72, "learning_rate": 3.605240912933221e-05, "loss": 1.0886, "step": 853, "task_loss": 0.44973498582839966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2859203815460205, "epoch": 0.72, "learning_rate": 3.609467455621302e-05, "loss": 0.9343, "step": 854, "task_loss": 0.523995041847229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.393160343170166, "epoch": 0.72, "learning_rate": 3.613693998309383e-05, "loss": 1.1468, "step": 855, "task_loss": 1.4749865531921387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9515118598937988, "epoch": 0.72, "learning_rate": 3.617920540997464e-05, "loss": 0.886, "step": 856, "task_loss": 0.28623446822166443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1024844646453857, "epoch": 0.72, "learning_rate": 3.622147083685546e-05, "loss": 1.0377, "step": 857, "task_loss": 0.6335031390190125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0000548362731934, "epoch": 0.72, "learning_rate": 3.6263736263736266e-05, "loss": 1.1178, "step": 858, "task_loss": 1.1264042854309082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1213500499725342, "epoch": 0.73, "learning_rate": 3.6306001690617076e-05, "loss": 1.0595, "step": 859, "task_loss": 1.5383663177490234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.243680477142334, "epoch": 0.73, "learning_rate": 3.6348267117497885e-05, "loss": 0.8482, "step": 860, "task_loss": 0.4964863359928131 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4609591960906982, "epoch": 0.73, "learning_rate": 3.63905325443787e-05, "loss": 1.2422, "step": 861, "task_loss": 0.44918423891067505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6599286198616028, "epoch": 0.73, "learning_rate": 3.643279797125951e-05, "loss": 0.9688, "step": 862, "task_loss": 1.1273161172866821 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4101601839065552, "epoch": 0.73, "learning_rate": 3.647506339814033e-05, "loss": 1.0733, "step": 863, "task_loss": 1.6049208641052246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1310172080993652, "epoch": 0.73, "learning_rate": 3.6517328825021136e-05, "loss": 0.9334, "step": 864, "task_loss": 0.35566720366477966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8259003758430481, "epoch": 0.73, "learning_rate": 3.6559594251901945e-05, "loss": 0.8809, "step": 865, "task_loss": 1.0888447761535645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47446179389953613, "epoch": 0.73, "learning_rate": 3.6601859678782755e-05, "loss": 0.777, "step": 866, "task_loss": 0.320507675409317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8849540948867798, "epoch": 0.73, "learning_rate": 3.664412510566357e-05, "loss": 0.8602, "step": 867, "task_loss": 1.5596539974212646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9091227054595947, "epoch": 0.73, "learning_rate": 3.668639053254438e-05, "loss": 1.1435, "step": 868, "task_loss": 0.2915381193161011 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7317951917648315, "epoch": 0.73, "learning_rate": 3.672865595942519e-05, "loss": 1.303, "step": 869, "task_loss": 0.8099367618560791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8608185648918152, "epoch": 0.73, "learning_rate": 3.6770921386306e-05, "loss": 1.0729, "step": 870, "task_loss": 0.7129935026168823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8221993446350098, "epoch": 0.74, "learning_rate": 3.6813186813186815e-05, "loss": 0.9044, "step": 871, "task_loss": 1.9838948249816895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6446069478988647, "epoch": 0.74, "learning_rate": 3.685545224006763e-05, "loss": 1.0964, "step": 872, "task_loss": 1.4997656345367432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7216767072677612, "epoch": 0.74, "learning_rate": 3.689771766694844e-05, "loss": 0.8302, "step": 873, "task_loss": 0.5439959764480591 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8284503221511841, "epoch": 0.74, "learning_rate": 3.693998309382925e-05, "loss": 0.9574, "step": 874, "task_loss": 0.7921137809753418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8647076487541199, "epoch": 0.74, "learning_rate": 3.698224852071006e-05, "loss": 0.8352, "step": 875, "task_loss": 0.4471794366836548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0555013418197632, "epoch": 0.74, "learning_rate": 3.702451394759087e-05, "loss": 0.981, "step": 876, "task_loss": 0.7380692958831787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1514315605163574, "epoch": 0.74, "learning_rate": 3.7066779374471685e-05, "loss": 1.0922, "step": 877, "task_loss": 1.3765140771865845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9500730037689209, "epoch": 0.74, "learning_rate": 3.7109044801352495e-05, "loss": 0.9306, "step": 878, "task_loss": 0.7938254475593567 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1201460361480713, "epoch": 0.74, "learning_rate": 3.7151310228233304e-05, "loss": 1.166, "step": 879, "task_loss": 0.7456966042518616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.433445692062378, "epoch": 0.74, "learning_rate": 3.7193575655114113e-05, "loss": 1.2694, "step": 880, "task_loss": 1.624867558479309 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8760026693344116, "epoch": 0.74, "learning_rate": 3.723584108199493e-05, "loss": 0.8176, "step": 881, "task_loss": 0.2707850933074951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.757109522819519, "epoch": 0.75, "learning_rate": 3.7278106508875746e-05, "loss": 1.0365, "step": 882, "task_loss": 0.412381649017334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1101362705230713, "epoch": 0.75, "learning_rate": 3.7320371935756555e-05, "loss": 0.9523, "step": 883, "task_loss": 1.0132911205291748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1424742937088013, "epoch": 0.75, "learning_rate": 3.7362637362637365e-05, "loss": 1.0599, "step": 884, "task_loss": 0.4156254529953003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1723688840866089, "epoch": 0.75, "learning_rate": 3.7404902789518174e-05, "loss": 0.9552, "step": 885, "task_loss": 1.8107950687408447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9506130218505859, "epoch": 0.75, "learning_rate": 3.744716821639899e-05, "loss": 0.9215, "step": 886, "task_loss": 0.8320884108543396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7546205520629883, "epoch": 0.75, "learning_rate": 3.74894336432798e-05, "loss": 0.9466, "step": 887, "task_loss": 0.872114896774292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.229750633239746, "epoch": 0.75, "learning_rate": 3.753169907016061e-05, "loss": 0.9258, "step": 888, "task_loss": 0.6527522206306458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.007322072982788, "epoch": 0.75, "learning_rate": 3.757396449704142e-05, "loss": 0.8403, "step": 889, "task_loss": 0.7152429819107056 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1415889263153076, "epoch": 0.75, "learning_rate": 3.7616229923922234e-05, "loss": 1.0371, "step": 890, "task_loss": 0.41824913024902344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.267037034034729, "epoch": 0.75, "learning_rate": 3.7658495350803044e-05, "loss": 1.0755, "step": 891, "task_loss": 1.9279865026474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9643366932868958, "epoch": 0.75, "learning_rate": 3.770076077768386e-05, "loss": 0.9367, "step": 892, "task_loss": 1.4607317447662354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7041254639625549, "epoch": 0.75, "learning_rate": 3.774302620456467e-05, "loss": 1.0253, "step": 893, "task_loss": 0.5360703468322754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1182794570922852, "epoch": 0.76, "learning_rate": 3.778529163144548e-05, "loss": 1.2526, "step": 894, "task_loss": 1.7000446319580078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.05556321144104, "epoch": 0.76, "learning_rate": 3.782755705832629e-05, "loss": 1.016, "step": 895, "task_loss": 0.13234396278858185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9342553615570068, "epoch": 0.76, "learning_rate": 3.7869822485207104e-05, "loss": 1.0886, "step": 896, "task_loss": 0.6759220957756042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8544114828109741, "epoch": 0.76, "learning_rate": 3.7912087912087914e-05, "loss": 1.1054, "step": 897, "task_loss": 1.1959929466247559 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9565320014953613, "epoch": 0.76, "learning_rate": 3.795435333896872e-05, "loss": 0.9225, "step": 898, "task_loss": 0.4150441884994507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8265417218208313, "epoch": 0.76, "learning_rate": 3.799661876584953e-05, "loss": 0.9895, "step": 899, "task_loss": 1.5784776210784912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8083503246307373, "epoch": 0.76, "learning_rate": 3.803888419273035e-05, "loss": 0.9002, "step": 900, "task_loss": 0.48912513256073 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.357454538345337, "epoch": 0.76, "learning_rate": 3.8081149619611165e-05, "loss": 1.0869, "step": 901, "task_loss": 1.5729515552520752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3547580242156982, "epoch": 0.76, "learning_rate": 3.8123415046491974e-05, "loss": 0.937, "step": 902, "task_loss": 1.2153193950653076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1733647584915161, "epoch": 0.76, "learning_rate": 3.8165680473372784e-05, "loss": 0.9609, "step": 903, "task_loss": 1.1576614379882812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9500135779380798, "epoch": 0.76, "learning_rate": 3.820794590025359e-05, "loss": 1.0771, "step": 904, "task_loss": 0.933475911617279 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3386025428771973, "epoch": 0.76, "learning_rate": 3.82502113271344e-05, "loss": 0.9196, "step": 905, "task_loss": 1.2426725625991821 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.11505925655365, "epoch": 0.77, "learning_rate": 3.829247675401522e-05, "loss": 0.9656, "step": 906, "task_loss": 1.8728079795837402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7155083417892456, "epoch": 0.77, "learning_rate": 3.833474218089603e-05, "loss": 1.017, "step": 907, "task_loss": 0.7860233783721924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0441162586212158, "epoch": 0.77, "learning_rate": 3.837700760777684e-05, "loss": 0.9204, "step": 908, "task_loss": 2.061845302581787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8040991425514221, "epoch": 0.77, "learning_rate": 3.8419273034657653e-05, "loss": 0.9895, "step": 909, "task_loss": 0.7251245975494385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9160764217376709, "epoch": 0.77, "learning_rate": 3.846153846153846e-05, "loss": 1.1098, "step": 910, "task_loss": 0.6459576487541199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1118083000183105, "epoch": 0.77, "learning_rate": 3.850380388841928e-05, "loss": 1.0591, "step": 911, "task_loss": 1.1487478017807007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2559471130371094, "epoch": 0.77, "learning_rate": 3.854606931530009e-05, "loss": 1.2714, "step": 912, "task_loss": 0.9017682075500488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2905759811401367, "epoch": 0.77, "learning_rate": 3.85883347421809e-05, "loss": 1.0116, "step": 913, "task_loss": 1.6485753059387207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9051058292388916, "epoch": 0.77, "learning_rate": 3.863060016906171e-05, "loss": 0.9472, "step": 914, "task_loss": 1.4695974588394165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0864940881729126, "epoch": 0.77, "learning_rate": 3.867286559594252e-05, "loss": 0.7685, "step": 915, "task_loss": 1.2301139831542969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.814308762550354, "epoch": 0.77, "learning_rate": 3.871513102282333e-05, "loss": 0.9169, "step": 916, "task_loss": 0.8463487029075623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9321892857551575, "epoch": 0.77, "learning_rate": 3.875739644970414e-05, "loss": 0.7935, "step": 917, "task_loss": 1.5136678218841553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1681876182556152, "epoch": 0.78, "learning_rate": 3.879966187658495e-05, "loss": 1.0689, "step": 918, "task_loss": 1.1769527196884155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6523679494857788, "epoch": 0.78, "learning_rate": 3.884192730346577e-05, "loss": 0.9469, "step": 919, "task_loss": 0.48253172636032104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4489507675170898, "epoch": 0.78, "learning_rate": 3.888419273034658e-05, "loss": 1.0645, "step": 920, "task_loss": 0.8699784874916077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9462257027626038, "epoch": 0.78, "learning_rate": 3.892645815722739e-05, "loss": 1.0443, "step": 921, "task_loss": 1.0671677589416504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6393455862998962, "epoch": 0.78, "learning_rate": 3.89687235841082e-05, "loss": 0.8468, "step": 922, "task_loss": 0.21093332767486572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0834699869155884, "epoch": 0.78, "learning_rate": 3.901098901098901e-05, "loss": 0.8719, "step": 923, "task_loss": 1.7747129201889038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9225248098373413, "epoch": 0.78, "learning_rate": 3.905325443786982e-05, "loss": 1.2102, "step": 924, "task_loss": 1.647411584854126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9872370958328247, "epoch": 0.78, "learning_rate": 3.909551986475064e-05, "loss": 1.5031, "step": 925, "task_loss": 1.4352259635925293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5590314269065857, "epoch": 0.78, "learning_rate": 3.913778529163145e-05, "loss": 0.7627, "step": 926, "task_loss": 0.7215142250061035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2236382961273193, "epoch": 0.78, "learning_rate": 3.9180050718512256e-05, "loss": 0.8242, "step": 927, "task_loss": 0.6502888202667236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9689298272132874, "epoch": 0.78, "learning_rate": 3.9222316145393066e-05, "loss": 1.0553, "step": 928, "task_loss": 0.5993413925170898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9524252414703369, "epoch": 0.78, "learning_rate": 3.926458157227388e-05, "loss": 0.9339, "step": 929, "task_loss": 0.8333476781845093 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.669052243232727, "epoch": 0.79, "learning_rate": 3.93068469991547e-05, "loss": 1.0264, "step": 930, "task_loss": 1.5607829093933105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7646862268447876, "epoch": 0.79, "learning_rate": 3.934911242603551e-05, "loss": 0.904, "step": 931, "task_loss": 0.791201651096344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49209707975387573, "epoch": 0.79, "learning_rate": 3.939137785291632e-05, "loss": 0.7366, "step": 932, "task_loss": 0.27562305331230164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5928568840026855, "epoch": 0.79, "learning_rate": 3.9433643279797126e-05, "loss": 1.1911, "step": 933, "task_loss": 1.958983063697815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9044746160507202, "epoch": 0.79, "learning_rate": 3.9475908706677936e-05, "loss": 0.8933, "step": 934, "task_loss": 1.2402105331420898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3121802806854248, "epoch": 0.79, "learning_rate": 3.951817413355875e-05, "loss": 1.0862, "step": 935, "task_loss": 0.9721159338951111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8160817623138428, "epoch": 0.79, "learning_rate": 3.956043956043956e-05, "loss": 0.8266, "step": 936, "task_loss": 0.349468857049942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7713687419891357, "epoch": 0.79, "learning_rate": 3.960270498732037e-05, "loss": 1.0316, "step": 937, "task_loss": 0.5987569689750671 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7616119384765625, "epoch": 0.79, "learning_rate": 3.964497041420119e-05, "loss": 0.8325, "step": 938, "task_loss": 0.6304075717926025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8960621356964111, "epoch": 0.79, "learning_rate": 3.9687235841081996e-05, "loss": 0.857, "step": 939, "task_loss": 0.47210395336151123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8217711448669434, "epoch": 0.79, "learning_rate": 3.972950126796281e-05, "loss": 0.9197, "step": 940, "task_loss": 1.0479120016098022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4719734489917755, "epoch": 0.79, "learning_rate": 3.977176669484362e-05, "loss": 0.8051, "step": 941, "task_loss": 0.6363613605499268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9600677490234375, "epoch": 0.8, "learning_rate": 3.981403212172443e-05, "loss": 0.9851, "step": 942, "task_loss": 0.7804445624351501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7501757144927979, "epoch": 0.8, "learning_rate": 3.985629754860524e-05, "loss": 0.9539, "step": 943, "task_loss": 0.2040707767009735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.297065019607544, "epoch": 0.8, "learning_rate": 3.989856297548606e-05, "loss": 0.9682, "step": 944, "task_loss": 1.007348656654358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9200826287269592, "epoch": 0.8, "learning_rate": 3.9940828402366866e-05, "loss": 1.0028, "step": 945, "task_loss": 1.0636074542999268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9954100251197815, "epoch": 0.8, "learning_rate": 3.9983093829247675e-05, "loss": 0.778, "step": 946, "task_loss": 0.7820543646812439 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6928896307945251, "epoch": 0.8, "learning_rate": 4.0025359256128485e-05, "loss": 0.7874, "step": 947, "task_loss": 0.8011103868484497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6247134208679199, "epoch": 0.8, "learning_rate": 4.00676246830093e-05, "loss": 0.864, "step": 948, "task_loss": 1.8927427530288696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9052202701568604, "epoch": 0.8, "learning_rate": 4.010989010989011e-05, "loss": 1.0365, "step": 949, "task_loss": 0.6095577478408813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.227888822555542, "epoch": 0.8, "learning_rate": 4.0152155536770927e-05, "loss": 0.9537, "step": 950, "task_loss": 0.2849472761154175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9246786236763, "epoch": 0.8, "learning_rate": 4.0194420963651736e-05, "loss": 0.8245, "step": 951, "task_loss": 0.7590465545654297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0755114555358887, "epoch": 0.8, "learning_rate": 4.0236686390532545e-05, "loss": 1.0702, "step": 952, "task_loss": 1.1933026313781738 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5875482559204102, "epoch": 0.81, "learning_rate": 4.0278951817413355e-05, "loss": 0.6741, "step": 953, "task_loss": 0.5466034412384033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0465961694717407, "epoch": 0.81, "learning_rate": 4.032121724429417e-05, "loss": 0.9932, "step": 954, "task_loss": 1.6028212308883667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7986182570457458, "epoch": 0.81, "learning_rate": 4.036348267117498e-05, "loss": 0.7721, "step": 955, "task_loss": 0.6538143754005432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8909903764724731, "epoch": 0.81, "learning_rate": 4.040574809805579e-05, "loss": 0.9057, "step": 956, "task_loss": 0.3286559283733368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0198768377304077, "epoch": 0.81, "learning_rate": 4.0448013524936606e-05, "loss": 0.9639, "step": 957, "task_loss": 1.3309921026229858 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7874588966369629, "epoch": 0.81, "learning_rate": 4.0490278951817415e-05, "loss": 0.8089, "step": 958, "task_loss": 1.1532946825027466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9630250930786133, "epoch": 0.81, "learning_rate": 4.053254437869823e-05, "loss": 1.0223, "step": 959, "task_loss": 1.046036958694458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0172003507614136, "epoch": 0.81, "learning_rate": 4.057480980557904e-05, "loss": 1.0844, "step": 960, "task_loss": 1.1492260694503784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.13735032081604, "epoch": 0.81, "learning_rate": 4.061707523245985e-05, "loss": 0.9457, "step": 961, "task_loss": 0.7386205792427063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.161307454109192, "epoch": 0.81, "learning_rate": 4.065934065934066e-05, "loss": 0.9326, "step": 962, "task_loss": 0.9093660712242126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6815217733383179, "epoch": 0.81, "learning_rate": 4.070160608622147e-05, "loss": 1.0401, "step": 963, "task_loss": 1.1925255060195923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1126519441604614, "epoch": 0.81, "learning_rate": 4.0743871513102285e-05, "loss": 0.9807, "step": 964, "task_loss": 0.3325062692165375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4261882305145264, "epoch": 0.82, "learning_rate": 4.0786136939983095e-05, "loss": 0.9518, "step": 965, "task_loss": 1.1918742656707764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8846665024757385, "epoch": 0.82, "learning_rate": 4.0828402366863904e-05, "loss": 0.7985, "step": 966, "task_loss": 1.832816481590271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0675280094146729, "epoch": 0.82, "learning_rate": 4.087066779374472e-05, "loss": 1.0258, "step": 967, "task_loss": 1.4543319940567017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.98860764503479, "epoch": 0.82, "learning_rate": 4.091293322062553e-05, "loss": 0.9265, "step": 968, "task_loss": 0.5255279541015625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.047734260559082, "epoch": 0.82, "learning_rate": 4.0955198647506346e-05, "loss": 0.8803, "step": 969, "task_loss": 1.5129033327102661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6060891151428223, "epoch": 0.82, "learning_rate": 4.0997464074387155e-05, "loss": 0.7731, "step": 970, "task_loss": 0.6307033896446228 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7606265544891357, "epoch": 0.82, "learning_rate": 4.1039729501267964e-05, "loss": 1.1606, "step": 971, "task_loss": 1.3341467380523682 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5923593640327454, "epoch": 0.82, "learning_rate": 4.1081994928148774e-05, "loss": 0.6841, "step": 972, "task_loss": 0.3486921787261963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2537508010864258, "epoch": 0.82, "learning_rate": 4.112426035502959e-05, "loss": 0.908, "step": 973, "task_loss": 0.7147971391677856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7385191917419434, "epoch": 0.82, "learning_rate": 4.11665257819104e-05, "loss": 0.8195, "step": 974, "task_loss": 0.5330421924591064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6751447319984436, "epoch": 0.82, "learning_rate": 4.120879120879121e-05, "loss": 0.836, "step": 975, "task_loss": 0.4182318150997162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9046831130981445, "epoch": 0.82, "learning_rate": 4.125105663567202e-05, "loss": 1.0088, "step": 976, "task_loss": 0.6935864686965942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8863300085067749, "epoch": 0.83, "learning_rate": 4.1293322062552834e-05, "loss": 0.8444, "step": 977, "task_loss": 1.4850883483886719 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8746967315673828, "epoch": 0.83, "learning_rate": 4.1335587489433644e-05, "loss": 0.7141, "step": 978, "task_loss": 0.9999901056289673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8823363780975342, "epoch": 0.83, "learning_rate": 4.137785291631446e-05, "loss": 0.8596, "step": 979, "task_loss": 0.43730753660202026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0419847965240479, "epoch": 0.83, "learning_rate": 4.142011834319527e-05, "loss": 0.909, "step": 980, "task_loss": 1.4584088325500488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1299464702606201, "epoch": 0.83, "learning_rate": 4.146238377007608e-05, "loss": 0.9935, "step": 981, "task_loss": 1.64651620388031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1502070426940918, "epoch": 0.83, "learning_rate": 4.150464919695689e-05, "loss": 1.0116, "step": 982, "task_loss": 0.5101893544197083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8307861089706421, "epoch": 0.83, "learning_rate": 4.1546914623837704e-05, "loss": 0.8024, "step": 983, "task_loss": 0.8191138505935669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6578338742256165, "epoch": 0.83, "learning_rate": 4.1589180050718514e-05, "loss": 1.0031, "step": 984, "task_loss": 1.0067516565322876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7545175552368164, "epoch": 0.83, "learning_rate": 4.163144547759932e-05, "loss": 0.7318, "step": 985, "task_loss": 1.4841983318328857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8482850790023804, "epoch": 0.83, "learning_rate": 4.167371090448014e-05, "loss": 1.0102, "step": 986, "task_loss": 1.2536041736602783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.092016577720642, "epoch": 0.83, "learning_rate": 4.171597633136095e-05, "loss": 0.9846, "step": 987, "task_loss": 1.4091531038284302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2810094356536865, "epoch": 0.83, "learning_rate": 4.1758241758241765e-05, "loss": 1.0835, "step": 988, "task_loss": 1.0761394500732422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9180656671524048, "epoch": 0.84, "learning_rate": 4.1800507185122574e-05, "loss": 0.7987, "step": 989, "task_loss": 1.3113675117492676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2041594982147217, "epoch": 0.84, "learning_rate": 4.1842772612003383e-05, "loss": 0.9181, "step": 990, "task_loss": 1.9763338565826416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5341095924377441, "epoch": 0.84, "learning_rate": 4.188503803888419e-05, "loss": 0.9244, "step": 991, "task_loss": 1.0528119802474976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5581151843070984, "epoch": 0.84, "learning_rate": 4.1927303465765e-05, "loss": 0.6213, "step": 992, "task_loss": 1.3490724563598633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4627292156219482, "epoch": 0.84, "learning_rate": 4.196956889264582e-05, "loss": 1.0234, "step": 993, "task_loss": 0.9761104583740234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9281327724456787, "epoch": 0.84, "learning_rate": 4.201183431952663e-05, "loss": 0.9393, "step": 994, "task_loss": 1.5587248802185059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0289579629898071, "epoch": 0.84, "learning_rate": 4.205409974640744e-05, "loss": 0.9154, "step": 995, "task_loss": 0.9345096945762634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7015708684921265, "epoch": 0.84, "learning_rate": 4.209636517328825e-05, "loss": 0.8069, "step": 996, "task_loss": 0.8579267263412476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0255517959594727, "epoch": 0.84, "learning_rate": 4.213863060016906e-05, "loss": 1.0518, "step": 997, "task_loss": 1.5255379676818848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9524267911911011, "epoch": 0.84, "learning_rate": 4.218089602704988e-05, "loss": 0.8657, "step": 998, "task_loss": 1.610011100769043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.562129020690918, "epoch": 0.84, "learning_rate": 4.222316145393069e-05, "loss": 1.116, "step": 999, "task_loss": 1.4117274284362793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7052178382873535, "epoch": 0.84, "learning_rate": 4.22654268808115e-05, "loss": 0.729, "step": 1000, "task_loss": 0.9233881235122681 }, { "epoch": 0.84, "eval_accuracy": 0.8773465346534653, "eval_loss": 0.5473520755767822, "eval_runtime": 225.5364, "eval_samples_per_second": 111.955, "eval_steps_per_second": 0.878, "step": 1000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6406934261322021, "epoch": 0.85, "learning_rate": 4.230769230769231e-05, "loss": 0.8187, "step": 1001, "task_loss": 0.5166146159172058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4774363040924072, "epoch": 0.85, "learning_rate": 4.234995773457312e-05, "loss": 0.7735, "step": 1002, "task_loss": 0.5347729921340942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9131306409835815, "epoch": 0.85, "learning_rate": 4.239222316145393e-05, "loss": 0.8978, "step": 1003, "task_loss": 0.8020006418228149 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9845358729362488, "epoch": 0.85, "learning_rate": 4.243448858833474e-05, "loss": 0.9645, "step": 1004, "task_loss": 0.9057111740112305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.839595079421997, "epoch": 0.85, "learning_rate": 4.247675401521555e-05, "loss": 1.0947, "step": 1005, "task_loss": 2.5043509006500244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0903139114379883, "epoch": 0.85, "learning_rate": 4.251901944209637e-05, "loss": 0.8438, "step": 1006, "task_loss": 0.21195954084396362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7785390615463257, "epoch": 0.85, "learning_rate": 4.256128486897718e-05, "loss": 0.741, "step": 1007, "task_loss": 0.5289069414138794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7664121389389038, "epoch": 0.85, "learning_rate": 4.260355029585799e-05, "loss": 0.8475, "step": 1008, "task_loss": 0.9432443380355835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5503156185150146, "epoch": 0.85, "learning_rate": 4.26458157227388e-05, "loss": 1.0528, "step": 1009, "task_loss": 1.5619062185287476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8943777084350586, "epoch": 0.85, "learning_rate": 4.268808114961961e-05, "loss": 0.8761, "step": 1010, "task_loss": 1.7858425378799438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2206605672836304, "epoch": 0.85, "learning_rate": 4.273034657650042e-05, "loss": 1.0507, "step": 1011, "task_loss": 1.4402529001235962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8293828368186951, "epoch": 0.85, "learning_rate": 4.277261200338124e-05, "loss": 0.8944, "step": 1012, "task_loss": 0.2745107412338257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7007020711898804, "epoch": 0.86, "learning_rate": 4.281487743026205e-05, "loss": 0.679, "step": 1013, "task_loss": 0.38816577196121216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9756845235824585, "epoch": 0.86, "learning_rate": 4.2857142857142856e-05, "loss": 0.8138, "step": 1014, "task_loss": 0.756329357624054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8058647513389587, "epoch": 0.86, "learning_rate": 4.289940828402367e-05, "loss": 0.8763, "step": 1015, "task_loss": 1.0434831380844116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8153285980224609, "epoch": 0.86, "learning_rate": 4.294167371090448e-05, "loss": 0.8286, "step": 1016, "task_loss": 0.8850699663162231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6842429041862488, "epoch": 0.86, "learning_rate": 4.29839391377853e-05, "loss": 0.8408, "step": 1017, "task_loss": 0.3628453314304352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7179243564605713, "epoch": 0.86, "learning_rate": 4.302620456466611e-05, "loss": 0.7081, "step": 1018, "task_loss": 1.2617570161819458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5356922149658203, "epoch": 0.86, "learning_rate": 4.306846999154692e-05, "loss": 1.1841, "step": 1019, "task_loss": 1.2176669836044312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.867803692817688, "epoch": 0.86, "learning_rate": 4.3110735418427726e-05, "loss": 0.6867, "step": 1020, "task_loss": 1.2959259748458862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6286799907684326, "epoch": 0.86, "learning_rate": 4.3153000845308536e-05, "loss": 1.1967, "step": 1021, "task_loss": 0.10641975700855255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5285118818283081, "epoch": 0.86, "learning_rate": 4.319526627218935e-05, "loss": 0.8631, "step": 1022, "task_loss": 0.09204700589179993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7077302932739258, "epoch": 0.86, "learning_rate": 4.323753169907016e-05, "loss": 0.6848, "step": 1023, "task_loss": 1.1820677518844604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7713788151741028, "epoch": 0.87, "learning_rate": 4.327979712595097e-05, "loss": 0.8865, "step": 1024, "task_loss": 0.15639527142047882 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7570871710777283, "epoch": 0.87, "learning_rate": 4.332206255283179e-05, "loss": 0.7323, "step": 1025, "task_loss": 0.8542624115943909 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.564191997051239, "epoch": 0.87, "learning_rate": 4.3364327979712596e-05, "loss": 0.9258, "step": 1026, "task_loss": 0.043362680822610855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7217341661453247, "epoch": 0.87, "learning_rate": 4.340659340659341e-05, "loss": 0.984, "step": 1027, "task_loss": 0.6012901067733765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9262603521347046, "epoch": 0.87, "learning_rate": 4.344885883347422e-05, "loss": 1.0491, "step": 1028, "task_loss": 1.137047529220581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6082314848899841, "epoch": 0.87, "learning_rate": 4.349112426035503e-05, "loss": 0.7795, "step": 1029, "task_loss": 0.8067362308502197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1549973487854004, "epoch": 0.87, "learning_rate": 4.353338968723584e-05, "loss": 0.9485, "step": 1030, "task_loss": 1.4903241395950317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5007762908935547, "epoch": 0.87, "learning_rate": 4.3575655114116657e-05, "loss": 1.1453, "step": 1031, "task_loss": 0.8137579560279846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1686699390411377, "epoch": 0.87, "learning_rate": 4.3617920540997466e-05, "loss": 0.8588, "step": 1032, "task_loss": 0.49630799889564514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6881077885627747, "epoch": 0.87, "learning_rate": 4.3660185967878275e-05, "loss": 1.0125, "step": 1033, "task_loss": 0.47727569937705994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.310853362083435, "epoch": 0.87, "learning_rate": 4.370245139475909e-05, "loss": 0.9761, "step": 1034, "task_loss": 1.7788794040679932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6573363542556763, "epoch": 0.87, "learning_rate": 4.37447168216399e-05, "loss": 0.7098, "step": 1035, "task_loss": 1.1031029224395752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.315794587135315, "epoch": 0.88, "learning_rate": 4.378698224852072e-05, "loss": 1.1271, "step": 1036, "task_loss": 0.8437097668647766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8488708138465881, "epoch": 0.88, "learning_rate": 4.3829247675401526e-05, "loss": 1.0503, "step": 1037, "task_loss": 1.6453142166137695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8339260220527649, "epoch": 0.88, "learning_rate": 4.3871513102282336e-05, "loss": 0.9563, "step": 1038, "task_loss": 0.7176416516304016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7850699424743652, "epoch": 0.88, "learning_rate": 4.3913778529163145e-05, "loss": 0.8406, "step": 1039, "task_loss": 0.4990219175815582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.882510781288147, "epoch": 0.88, "learning_rate": 4.3956043956043955e-05, "loss": 0.9489, "step": 1040, "task_loss": 0.9199416637420654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0936996936798096, "epoch": 0.88, "learning_rate": 4.399830938292477e-05, "loss": 1.2268, "step": 1041, "task_loss": 0.9064396619796753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8860594034194946, "epoch": 0.88, "learning_rate": 4.404057480980558e-05, "loss": 0.7302, "step": 1042, "task_loss": 1.3967363834381104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7306011319160461, "epoch": 0.88, "learning_rate": 4.408284023668639e-05, "loss": 0.8238, "step": 1043, "task_loss": 0.6136924624443054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5414749383926392, "epoch": 0.88, "learning_rate": 4.4125105663567206e-05, "loss": 0.8322, "step": 1044, "task_loss": 0.5244640111923218 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5917927026748657, "epoch": 0.88, "learning_rate": 4.4167371090448015e-05, "loss": 0.9212, "step": 1045, "task_loss": 0.8053513765335083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8576815128326416, "epoch": 0.88, "learning_rate": 4.420963651732883e-05, "loss": 1.2213, "step": 1046, "task_loss": 1.4971734285354614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5456199049949646, "epoch": 0.88, "learning_rate": 4.425190194420964e-05, "loss": 1.0826, "step": 1047, "task_loss": 0.2557753026485443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5806742906570435, "epoch": 0.89, "learning_rate": 4.429416737109045e-05, "loss": 0.8285, "step": 1048, "task_loss": 0.6905035972595215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7630212306976318, "epoch": 0.89, "learning_rate": 4.433643279797126e-05, "loss": 0.9844, "step": 1049, "task_loss": 1.0346499681472778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8248350024223328, "epoch": 0.89, "learning_rate": 4.437869822485207e-05, "loss": 0.9053, "step": 1050, "task_loss": 0.8613836765289307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5939721465110779, "epoch": 0.89, "learning_rate": 4.4420963651732885e-05, "loss": 0.7422, "step": 1051, "task_loss": 0.8077645897865295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.10902738571167, "epoch": 0.89, "learning_rate": 4.4463229078613694e-05, "loss": 0.9252, "step": 1052, "task_loss": 0.6429487466812134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.064831256866455, "epoch": 0.89, "learning_rate": 4.4505494505494504e-05, "loss": 0.7597, "step": 1053, "task_loss": 1.023499608039856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9962587952613831, "epoch": 0.89, "learning_rate": 4.454775993237532e-05, "loss": 0.9459, "step": 1054, "task_loss": 1.2038617134094238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6250903606414795, "epoch": 0.89, "learning_rate": 4.459002535925613e-05, "loss": 0.8021, "step": 1055, "task_loss": 0.40936997532844543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0675842761993408, "epoch": 0.89, "learning_rate": 4.4632290786136946e-05, "loss": 0.9042, "step": 1056, "task_loss": 1.2700552940368652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6769142150878906, "epoch": 0.89, "learning_rate": 4.4674556213017755e-05, "loss": 1.2055, "step": 1057, "task_loss": 1.4775896072387695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.964813768863678, "epoch": 0.89, "learning_rate": 4.4716821639898564e-05, "loss": 0.8234, "step": 1058, "task_loss": 1.5791559219360352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4713810384273529, "epoch": 0.89, "learning_rate": 4.4759087066779374e-05, "loss": 0.8379, "step": 1059, "task_loss": 0.2156589925289154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0655261278152466, "epoch": 0.9, "learning_rate": 4.480135249366019e-05, "loss": 0.9443, "step": 1060, "task_loss": 1.5356916189193726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7226281762123108, "epoch": 0.9, "learning_rate": 4.4843617920541e-05, "loss": 0.8409, "step": 1061, "task_loss": 0.6996141672134399 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.153782606124878, "epoch": 0.9, "learning_rate": 4.488588334742181e-05, "loss": 0.9787, "step": 1062, "task_loss": 1.0024789571762085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.046999454498291, "epoch": 0.9, "learning_rate": 4.4928148774302625e-05, "loss": 0.8392, "step": 1063, "task_loss": 0.7635336518287659 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5733722448349, "epoch": 0.9, "learning_rate": 4.4970414201183434e-05, "loss": 1.0046, "step": 1064, "task_loss": 1.8643677234649658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1650333404541016, "epoch": 0.9, "learning_rate": 4.501267962806425e-05, "loss": 0.936, "step": 1065, "task_loss": 0.7466510534286499 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.282706618309021, "epoch": 0.9, "learning_rate": 4.505494505494506e-05, "loss": 1.1691, "step": 1066, "task_loss": 0.6457222700119019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5789975523948669, "epoch": 0.9, "learning_rate": 4.509721048182587e-05, "loss": 0.8922, "step": 1067, "task_loss": 0.5567877888679504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0471723079681396, "epoch": 0.9, "learning_rate": 4.513947590870668e-05, "loss": 0.7673, "step": 1068, "task_loss": 1.1186398267745972 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47173628211021423, "epoch": 0.9, "learning_rate": 4.518174133558749e-05, "loss": 0.6432, "step": 1069, "task_loss": 0.9462851881980896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0246813297271729, "epoch": 0.9, "learning_rate": 4.5224006762468304e-05, "loss": 0.8928, "step": 1070, "task_loss": 0.9604721069335938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7272212505340576, "epoch": 0.9, "learning_rate": 4.5266272189349114e-05, "loss": 0.8017, "step": 1071, "task_loss": 0.6568514108657837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8164361715316772, "epoch": 0.91, "learning_rate": 4.530853761622992e-05, "loss": 0.8583, "step": 1072, "task_loss": 1.064897894859314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.719578206539154, "epoch": 0.91, "learning_rate": 4.535080304311074e-05, "loss": 0.755, "step": 1073, "task_loss": 0.6219607591629028 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.050671935081482, "epoch": 0.91, "learning_rate": 4.539306846999155e-05, "loss": 0.7336, "step": 1074, "task_loss": 0.720217764377594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8910586833953857, "epoch": 0.91, "learning_rate": 4.5435333896872365e-05, "loss": 0.8277, "step": 1075, "task_loss": 0.5747786164283752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7250556349754333, "epoch": 0.91, "learning_rate": 4.5477599323753174e-05, "loss": 1.0469, "step": 1076, "task_loss": 0.589699923992157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8843887448310852, "epoch": 0.91, "learning_rate": 4.5519864750633983e-05, "loss": 0.8914, "step": 1077, "task_loss": 1.216519832611084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5351722240447998, "epoch": 0.91, "learning_rate": 4.556213017751479e-05, "loss": 0.7043, "step": 1078, "task_loss": 0.7238595485687256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7059869170188904, "epoch": 0.91, "learning_rate": 4.56043956043956e-05, "loss": 0.8144, "step": 1079, "task_loss": 0.6828386187553406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.836540699005127, "epoch": 0.91, "learning_rate": 4.564666103127642e-05, "loss": 0.9301, "step": 1080, "task_loss": 0.9157995581626892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9871584177017212, "epoch": 0.91, "learning_rate": 4.568892645815723e-05, "loss": 1.1212, "step": 1081, "task_loss": 1.2725913524627686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9317019581794739, "epoch": 0.91, "learning_rate": 4.573119188503804e-05, "loss": 0.8939, "step": 1082, "task_loss": 1.1194534301757812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5443918704986572, "epoch": 0.91, "learning_rate": 4.577345731191885e-05, "loss": 0.8874, "step": 1083, "task_loss": 0.3976791799068451 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.820447564125061, "epoch": 0.92, "learning_rate": 4.581572273879966e-05, "loss": 1.0167, "step": 1084, "task_loss": 0.7069005370140076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6519954204559326, "epoch": 0.92, "learning_rate": 4.585798816568048e-05, "loss": 0.9248, "step": 1085, "task_loss": 0.8192471265792847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7855989933013916, "epoch": 0.92, "learning_rate": 4.590025359256129e-05, "loss": 0.7051, "step": 1086, "task_loss": 1.2316533327102661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6579887866973877, "epoch": 0.92, "learning_rate": 4.59425190194421e-05, "loss": 1.0256, "step": 1087, "task_loss": 1.3130455017089844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8889706134796143, "epoch": 0.92, "learning_rate": 4.598478444632291e-05, "loss": 0.8999, "step": 1088, "task_loss": 0.5759418606758118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7089442014694214, "epoch": 0.92, "learning_rate": 4.602704987320372e-05, "loss": 0.7937, "step": 1089, "task_loss": 0.7583451867103577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0801196098327637, "epoch": 0.92, "learning_rate": 4.606931530008453e-05, "loss": 0.7692, "step": 1090, "task_loss": 0.9860310554504395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6328905820846558, "epoch": 0.92, "learning_rate": 4.611158072696534e-05, "loss": 0.9594, "step": 1091, "task_loss": 1.7957305908203125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7629756331443787, "epoch": 0.92, "learning_rate": 4.615384615384616e-05, "loss": 0.8047, "step": 1092, "task_loss": 0.8766059875488281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.165700078010559, "epoch": 0.92, "learning_rate": 4.619611158072697e-05, "loss": 0.9336, "step": 1093, "task_loss": 1.229699730873108 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1626949310302734, "epoch": 0.92, "learning_rate": 4.6238377007607784e-05, "loss": 0.914, "step": 1094, "task_loss": 0.44209569692611694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9383944272994995, "epoch": 0.93, "learning_rate": 4.628064243448859e-05, "loss": 0.7768, "step": 1095, "task_loss": 1.077423095703125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5795996189117432, "epoch": 0.93, "learning_rate": 4.63229078613694e-05, "loss": 0.832, "step": 1096, "task_loss": 0.1811627596616745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2577303647994995, "epoch": 0.93, "learning_rate": 4.636517328825021e-05, "loss": 0.9809, "step": 1097, "task_loss": 1.287643313407898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9835022687911987, "epoch": 0.93, "learning_rate": 4.640743871513102e-05, "loss": 0.774, "step": 1098, "task_loss": 1.102400541305542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9131743907928467, "epoch": 0.93, "learning_rate": 4.644970414201184e-05, "loss": 0.8119, "step": 1099, "task_loss": 1.4195345640182495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8345139026641846, "epoch": 0.93, "learning_rate": 4.649196956889265e-05, "loss": 0.9994, "step": 1100, "task_loss": 0.4797169268131256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7816281318664551, "epoch": 0.93, "learning_rate": 4.6534234995773456e-05, "loss": 1.0197, "step": 1101, "task_loss": 0.38640034198760986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2747520208358765, "epoch": 0.93, "learning_rate": 4.657650042265427e-05, "loss": 0.8907, "step": 1102, "task_loss": 0.864011287689209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7572222352027893, "epoch": 0.93, "learning_rate": 4.661876584953508e-05, "loss": 0.6542, "step": 1103, "task_loss": 0.21408192813396454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1232514381408691, "epoch": 0.93, "learning_rate": 4.66610312764159e-05, "loss": 0.8843, "step": 1104, "task_loss": 1.2714283466339111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1750829219818115, "epoch": 0.93, "learning_rate": 4.670329670329671e-05, "loss": 0.9257, "step": 1105, "task_loss": 1.9229350090026855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.263854742050171, "epoch": 0.93, "learning_rate": 4.674556213017752e-05, "loss": 0.9332, "step": 1106, "task_loss": 1.1648224592208862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6923394203186035, "epoch": 0.94, "learning_rate": 4.6787827557058326e-05, "loss": 0.9058, "step": 1107, "task_loss": 0.48443543910980225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2169307470321655, "epoch": 0.94, "learning_rate": 4.683009298393914e-05, "loss": 0.8957, "step": 1108, "task_loss": 1.2675312757492065 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6842162013053894, "epoch": 0.94, "learning_rate": 4.687235841081995e-05, "loss": 0.8282, "step": 1109, "task_loss": 0.5703313946723938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6165573000907898, "epoch": 0.94, "learning_rate": 4.691462383770076e-05, "loss": 0.6278, "step": 1110, "task_loss": 0.684467077255249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2849135398864746, "epoch": 0.94, "learning_rate": 4.695688926458158e-05, "loss": 0.9976, "step": 1111, "task_loss": 0.7422243356704712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6567965745925903, "epoch": 0.94, "learning_rate": 4.6999154691462387e-05, "loss": 0.8592, "step": 1112, "task_loss": 1.306665062904358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8641914129257202, "epoch": 0.94, "learning_rate": 4.7041420118343196e-05, "loss": 0.8969, "step": 1113, "task_loss": 0.5412967801094055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8893709182739258, "epoch": 0.94, "learning_rate": 4.708368554522401e-05, "loss": 0.9737, "step": 1114, "task_loss": 0.8055944442749023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4296616017818451, "epoch": 0.94, "learning_rate": 4.712595097210482e-05, "loss": 0.6454, "step": 1115, "task_loss": 0.21795479953289032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6146067976951599, "epoch": 0.94, "learning_rate": 4.716821639898563e-05, "loss": 0.6804, "step": 1116, "task_loss": 0.8280937075614929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8161239624023438, "epoch": 0.94, "learning_rate": 4.721048182586644e-05, "loss": 0.8131, "step": 1117, "task_loss": 0.7271602749824524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9759347438812256, "epoch": 0.94, "learning_rate": 4.7252747252747257e-05, "loss": 0.7175, "step": 1118, "task_loss": 0.8151625990867615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6605929136276245, "epoch": 0.95, "learning_rate": 4.7295012679628066e-05, "loss": 0.7701, "step": 1119, "task_loss": 0.4892179071903229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.647374153137207, "epoch": 0.95, "learning_rate": 4.7337278106508875e-05, "loss": 0.5975, "step": 1120, "task_loss": 0.2944411337375641 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2395609617233276, "epoch": 0.95, "learning_rate": 4.737954353338969e-05, "loss": 0.8506, "step": 1121, "task_loss": 1.0860813856124878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7393505573272705, "epoch": 0.95, "learning_rate": 4.74218089602705e-05, "loss": 0.8476, "step": 1122, "task_loss": 0.3383278548717499 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7710299491882324, "epoch": 0.95, "learning_rate": 4.746407438715132e-05, "loss": 0.6328, "step": 1123, "task_loss": 0.5892376899719238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9416136741638184, "epoch": 0.95, "learning_rate": 4.7506339814032126e-05, "loss": 0.7723, "step": 1124, "task_loss": 0.6819658279418945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5807839632034302, "epoch": 0.95, "learning_rate": 4.7548605240912936e-05, "loss": 0.6164, "step": 1125, "task_loss": 0.669805645942688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7484465837478638, "epoch": 0.95, "learning_rate": 4.7590870667793745e-05, "loss": 0.9269, "step": 1126, "task_loss": 1.0249522924423218 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7655998468399048, "epoch": 0.95, "learning_rate": 4.7633136094674555e-05, "loss": 0.7125, "step": 1127, "task_loss": 1.5309690237045288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7981546521186829, "epoch": 0.95, "learning_rate": 4.767540152155537e-05, "loss": 1.0287, "step": 1128, "task_loss": 1.3696683645248413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9472647309303284, "epoch": 0.95, "learning_rate": 4.771766694843618e-05, "loss": 0.7616, "step": 1129, "task_loss": 1.7952595949172974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9056338667869568, "epoch": 0.95, "learning_rate": 4.775993237531699e-05, "loss": 0.8231, "step": 1130, "task_loss": 1.6249325275421143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34798866510391235, "epoch": 0.96, "learning_rate": 4.7802197802197806e-05, "loss": 0.6802, "step": 1131, "task_loss": 0.08991029858589172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7035545110702515, "epoch": 0.96, "learning_rate": 4.7844463229078615e-05, "loss": 0.8946, "step": 1132, "task_loss": 0.3620370328426361 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.615001380443573, "epoch": 0.96, "learning_rate": 4.788672865595943e-05, "loss": 0.8414, "step": 1133, "task_loss": 1.0941662788391113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7890470027923584, "epoch": 0.96, "learning_rate": 4.792899408284024e-05, "loss": 0.9335, "step": 1134, "task_loss": 0.6290927529335022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2558977603912354, "epoch": 0.96, "learning_rate": 4.797125950972105e-05, "loss": 1.0801, "step": 1135, "task_loss": 1.8750083446502686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.688927412033081, "epoch": 0.96, "learning_rate": 4.801352493660186e-05, "loss": 0.8891, "step": 1136, "task_loss": 1.0981426239013672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7226504683494568, "epoch": 0.96, "learning_rate": 4.8055790363482676e-05, "loss": 0.8138, "step": 1137, "task_loss": 0.9106653332710266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8440982699394226, "epoch": 0.96, "learning_rate": 4.8098055790363485e-05, "loss": 0.6944, "step": 1138, "task_loss": 0.4545222222805023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.620614230632782, "epoch": 0.96, "learning_rate": 4.8140321217244294e-05, "loss": 1.0279, "step": 1139, "task_loss": 1.1348025798797607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3493432998657227, "epoch": 0.96, "learning_rate": 4.818258664412511e-05, "loss": 0.9578, "step": 1140, "task_loss": 0.4679960608482361 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.979732871055603, "epoch": 0.96, "learning_rate": 4.822485207100592e-05, "loss": 0.8323, "step": 1141, "task_loss": 1.0672962665557861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7768120765686035, "epoch": 0.96, "learning_rate": 4.826711749788673e-05, "loss": 0.8236, "step": 1142, "task_loss": 1.4994726181030273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8544281125068665, "epoch": 0.97, "learning_rate": 4.8309382924767545e-05, "loss": 0.8363, "step": 1143, "task_loss": 0.6631637215614319 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.110274076461792, "epoch": 0.97, "learning_rate": 4.8351648351648355e-05, "loss": 0.8542, "step": 1144, "task_loss": 0.9147529602050781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5786669850349426, "epoch": 0.97, "learning_rate": 4.8393913778529164e-05, "loss": 0.5914, "step": 1145, "task_loss": 0.3172459304332733 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6511435508728027, "epoch": 0.97, "learning_rate": 4.8436179205409974e-05, "loss": 0.9974, "step": 1146, "task_loss": 1.5227354764938354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5638840198516846, "epoch": 0.97, "learning_rate": 4.847844463229079e-05, "loss": 0.7396, "step": 1147, "task_loss": 0.3138814866542816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7771573662757874, "epoch": 0.97, "learning_rate": 4.85207100591716e-05, "loss": 0.7953, "step": 1148, "task_loss": 0.6273994445800781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9929516315460205, "epoch": 0.97, "learning_rate": 4.856297548605241e-05, "loss": 0.8556, "step": 1149, "task_loss": 1.468699336051941 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0271494388580322, "epoch": 0.97, "learning_rate": 4.8605240912933225e-05, "loss": 0.7325, "step": 1150, "task_loss": 1.3612909317016602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.724450409412384, "epoch": 0.97, "learning_rate": 4.8647506339814034e-05, "loss": 0.6754, "step": 1151, "task_loss": 0.9793192744255066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0492002964019775, "epoch": 0.97, "learning_rate": 4.868977176669485e-05, "loss": 0.7911, "step": 1152, "task_loss": 0.6451606154441833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5660459995269775, "epoch": 0.97, "learning_rate": 4.873203719357566e-05, "loss": 0.7395, "step": 1153, "task_loss": 0.8183006048202515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.474632203578949, "epoch": 0.97, "learning_rate": 4.877430262045647e-05, "loss": 0.8298, "step": 1154, "task_loss": 0.5101319551467896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9735820293426514, "epoch": 0.98, "learning_rate": 4.881656804733728e-05, "loss": 0.8301, "step": 1155, "task_loss": 1.6329922676086426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6661190390586853, "epoch": 0.98, "learning_rate": 4.885883347421809e-05, "loss": 0.7396, "step": 1156, "task_loss": 0.5688637495040894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6823300719261169, "epoch": 0.98, "learning_rate": 4.8901098901098904e-05, "loss": 0.6999, "step": 1157, "task_loss": 0.5251001715660095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7705098986625671, "epoch": 0.98, "learning_rate": 4.8943364327979713e-05, "loss": 0.7691, "step": 1158, "task_loss": 0.4206394553184509 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6189604997634888, "epoch": 0.98, "learning_rate": 4.898562975486053e-05, "loss": 0.8023, "step": 1159, "task_loss": 0.5048073530197144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6373060941696167, "epoch": 0.98, "learning_rate": 4.902789518174134e-05, "loss": 0.8135, "step": 1160, "task_loss": 0.6829380393028259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7377821207046509, "epoch": 0.98, "learning_rate": 4.907016060862215e-05, "loss": 0.8444, "step": 1161, "task_loss": 0.37326258420944214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0954371690750122, "epoch": 0.98, "learning_rate": 4.9112426035502965e-05, "loss": 0.9697, "step": 1162, "task_loss": 1.023380994796753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.280561089515686, "epoch": 0.98, "learning_rate": 4.9154691462383774e-05, "loss": 1.0147, "step": 1163, "task_loss": 1.2736636400222778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6554126739501953, "epoch": 0.98, "learning_rate": 4.919695688926458e-05, "loss": 0.768, "step": 1164, "task_loss": 1.1903795003890991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7842713594436646, "epoch": 0.98, "learning_rate": 4.923922231614539e-05, "loss": 0.7761, "step": 1165, "task_loss": 0.7038989067077637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.257037878036499, "epoch": 0.99, "learning_rate": 4.928148774302621e-05, "loss": 0.9201, "step": 1166, "task_loss": 0.7914219498634338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6979518532752991, "epoch": 0.99, "learning_rate": 4.932375316990702e-05, "loss": 0.7001, "step": 1167, "task_loss": 0.6382023692131042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0545845031738281, "epoch": 0.99, "learning_rate": 4.936601859678783e-05, "loss": 0.8437, "step": 1168, "task_loss": 1.552354097366333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0328158140182495, "epoch": 0.99, "learning_rate": 4.9408284023668644e-05, "loss": 0.8322, "step": 1169, "task_loss": 0.7564343214035034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0396249294281006, "epoch": 0.99, "learning_rate": 4.945054945054945e-05, "loss": 0.6618, "step": 1170, "task_loss": 0.4696506857872009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7203500270843506, "epoch": 0.99, "learning_rate": 4.949281487743026e-05, "loss": 0.6808, "step": 1171, "task_loss": 0.1880689561367035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6670799255371094, "epoch": 0.99, "learning_rate": 4.953508030431108e-05, "loss": 0.8896, "step": 1172, "task_loss": 1.6696733236312866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6051375865936279, "epoch": 0.99, "learning_rate": 4.957734573119189e-05, "loss": 0.834, "step": 1173, "task_loss": 0.1660824567079544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8299570083618164, "epoch": 0.99, "learning_rate": 4.96196111580727e-05, "loss": 0.8554, "step": 1174, "task_loss": 0.918289840221405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3003058433532715, "epoch": 0.99, "learning_rate": 4.966187658495351e-05, "loss": 1.0372, "step": 1175, "task_loss": 1.075036883354187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0989793539047241, "epoch": 0.99, "learning_rate": 4.970414201183432e-05, "loss": 0.8448, "step": 1176, "task_loss": 0.5859171748161316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47512906789779663, "epoch": 0.99, "learning_rate": 4.974640743871513e-05, "loss": 0.5496, "step": 1177, "task_loss": 0.6971918344497681 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2151539325714111, "epoch": 1.0, "learning_rate": 4.978867286559594e-05, "loss": 1.2207, "step": 1178, "task_loss": 1.0740293264389038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6540042161941528, "epoch": 1.0, "learning_rate": 4.983093829247676e-05, "loss": 0.799, "step": 1179, "task_loss": 1.4554388523101807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6209936738014221, "epoch": 1.0, "learning_rate": 4.987320371935757e-05, "loss": 0.6405, "step": 1180, "task_loss": 0.6393856406211853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6591631174087524, "epoch": 1.0, "learning_rate": 4.9915469146238384e-05, "loss": 0.8254, "step": 1181, "task_loss": 1.6942919492721558 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6992000341415405, "epoch": 1.0, "learning_rate": 4.995773457311919e-05, "loss": 0.7312, "step": 1182, "task_loss": 0.2321470081806183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9188761115074158, "epoch": 1.0, "learning_rate": 5e-05, "loss": 0.8403, "step": 1183, "task_loss": 0.9889511466026306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0842434167861938, "epoch": 1.0, "learning_rate": 4.999530384145769e-05, "loss": 1.4113, "step": 1184, "task_loss": 0.37364462018013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5621779561042786, "epoch": 1.0, "learning_rate": 4.999060768291538e-05, "loss": 0.7831, "step": 1185, "task_loss": 0.5005952715873718 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.862773060798645, "epoch": 1.0, "learning_rate": 4.998591152437306e-05, "loss": 0.7874, "step": 1186, "task_loss": 2.0639700889587402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8259677290916443, "epoch": 1.0, "learning_rate": 4.9981215365830755e-05, "loss": 0.8546, "step": 1187, "task_loss": 1.1613569259643555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6405386924743652, "epoch": 1.0, "learning_rate": 4.997651920728844e-05, "loss": 0.8474, "step": 1188, "task_loss": 0.925140380859375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1644678115844727, "epoch": 1.01, "learning_rate": 4.997182304874613e-05, "loss": 0.7357, "step": 1189, "task_loss": 0.893349289894104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7068578600883484, "epoch": 1.01, "learning_rate": 4.9967126890203814e-05, "loss": 0.7144, "step": 1190, "task_loss": 0.41942262649536133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.957436203956604, "epoch": 1.01, "learning_rate": 4.99624307316615e-05, "loss": 0.8093, "step": 1191, "task_loss": 1.1181063652038574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6972085237503052, "epoch": 1.01, "learning_rate": 4.995773457311919e-05, "loss": 0.6451, "step": 1192, "task_loss": 0.5754778981208801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5771447420120239, "epoch": 1.01, "learning_rate": 4.995303841457688e-05, "loss": 0.7266, "step": 1193, "task_loss": 0.41850048303604126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7822365164756775, "epoch": 1.01, "learning_rate": 4.9948342256034566e-05, "loss": 0.8508, "step": 1194, "task_loss": 0.8899636268615723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6295628547668457, "epoch": 1.01, "learning_rate": 4.994364609749225e-05, "loss": 0.7655, "step": 1195, "task_loss": 0.5817393064498901 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.761860191822052, "epoch": 1.01, "learning_rate": 4.993894993894994e-05, "loss": 0.8105, "step": 1196, "task_loss": 0.30530163645744324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2623413801193237, "epoch": 1.01, "learning_rate": 4.993425378040763e-05, "loss": 0.817, "step": 1197, "task_loss": 0.8808668851852417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.639186680316925, "epoch": 1.01, "learning_rate": 4.992955762186531e-05, "loss": 0.6714, "step": 1198, "task_loss": 0.9050926566123962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5778490304946899, "epoch": 1.01, "learning_rate": 4.9924861463323004e-05, "loss": 0.4984, "step": 1199, "task_loss": 0.4535747766494751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3414039611816406, "epoch": 1.01, "learning_rate": 4.992016530478069e-05, "loss": 0.9369, "step": 1200, "task_loss": 0.43961501121520996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8718283772468567, "epoch": 1.02, "learning_rate": 4.9915469146238384e-05, "loss": 0.8468, "step": 1201, "task_loss": 0.5481698513031006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7717528343200684, "epoch": 1.02, "learning_rate": 4.991077298769607e-05, "loss": 0.7422, "step": 1202, "task_loss": 0.5219957828521729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5771570205688477, "epoch": 1.02, "learning_rate": 4.990607682915375e-05, "loss": 0.6899, "step": 1203, "task_loss": 0.576032280921936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9418631792068481, "epoch": 1.02, "learning_rate": 4.990138067061144e-05, "loss": 0.7849, "step": 1204, "task_loss": 1.1170276403427124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0152599811553955, "epoch": 1.02, "learning_rate": 4.989668451206913e-05, "loss": 0.8873, "step": 1205, "task_loss": 0.7837698459625244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6333853006362915, "epoch": 1.02, "learning_rate": 4.989198835352682e-05, "loss": 0.9868, "step": 1206, "task_loss": 1.4106652736663818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4627916812896729, "epoch": 1.02, "learning_rate": 4.98872921949845e-05, "loss": 0.9593, "step": 1207, "task_loss": 0.8392561674118042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9550553560256958, "epoch": 1.02, "learning_rate": 4.9882596036442195e-05, "loss": 1.0316, "step": 1208, "task_loss": 1.444199562072754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6082151532173157, "epoch": 1.02, "learning_rate": 4.987789987789988e-05, "loss": 0.5874, "step": 1209, "task_loss": 0.9063596129417419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.855522632598877, "epoch": 1.02, "learning_rate": 4.987320371935757e-05, "loss": 0.8887, "step": 1210, "task_loss": 1.5732998847961426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8228781223297119, "epoch": 1.02, "learning_rate": 4.9868507560815254e-05, "loss": 0.9071, "step": 1211, "task_loss": 1.395807147026062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0297553539276123, "epoch": 1.02, "learning_rate": 4.986381140227294e-05, "loss": 0.8536, "step": 1212, "task_loss": 1.552739143371582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.588183581829071, "epoch": 1.03, "learning_rate": 4.985911524373063e-05, "loss": 0.6175, "step": 1213, "task_loss": 0.5156790018081665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5474134683609009, "epoch": 1.03, "learning_rate": 4.985441908518832e-05, "loss": 0.6986, "step": 1214, "task_loss": 0.4322492480278015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8742802143096924, "epoch": 1.03, "learning_rate": 4.9849722926646006e-05, "loss": 0.6869, "step": 1215, "task_loss": 1.436391830444336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.799309253692627, "epoch": 1.03, "learning_rate": 4.984502676810369e-05, "loss": 0.7392, "step": 1216, "task_loss": 0.8387566208839417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6824354529380798, "epoch": 1.03, "learning_rate": 4.984033060956138e-05, "loss": 0.8001, "step": 1217, "task_loss": 0.3127535879611969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8048501014709473, "epoch": 1.03, "learning_rate": 4.983563445101907e-05, "loss": 0.6682, "step": 1218, "task_loss": 2.843467950820923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8077281713485718, "epoch": 1.03, "learning_rate": 4.983093829247676e-05, "loss": 0.7088, "step": 1219, "task_loss": 0.665897011756897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7415891289710999, "epoch": 1.03, "learning_rate": 4.9826242133934444e-05, "loss": 0.7238, "step": 1220, "task_loss": 0.7219927906990051 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8160539865493774, "epoch": 1.03, "learning_rate": 4.982154597539213e-05, "loss": 0.6462, "step": 1221, "task_loss": 1.4054220914840698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.668685793876648, "epoch": 1.03, "learning_rate": 4.981684981684982e-05, "loss": 0.6602, "step": 1222, "task_loss": 1.0342191457748413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.682410478591919, "epoch": 1.03, "learning_rate": 4.981215365830751e-05, "loss": 0.5428, "step": 1223, "task_loss": 0.20911963284015656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9190638065338135, "epoch": 1.03, "learning_rate": 4.980745749976519e-05, "loss": 0.8843, "step": 1224, "task_loss": 1.6182515621185303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37287187576293945, "epoch": 1.04, "learning_rate": 4.980276134122288e-05, "loss": 0.5792, "step": 1225, "task_loss": 0.47068265080451965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49221426248550415, "epoch": 1.04, "learning_rate": 4.979806518268057e-05, "loss": 0.7815, "step": 1226, "task_loss": 0.3649473488330841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7229851484298706, "epoch": 1.04, "learning_rate": 4.9793369024138256e-05, "loss": 0.7538, "step": 1227, "task_loss": 0.8756842017173767 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1795562505722046, "epoch": 1.04, "learning_rate": 4.978867286559594e-05, "loss": 0.8269, "step": 1228, "task_loss": 1.4452028274536133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5553463697433472, "epoch": 1.04, "learning_rate": 4.978397670705363e-05, "loss": 0.6602, "step": 1229, "task_loss": 0.47743508219718933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5887724161148071, "epoch": 1.04, "learning_rate": 4.977928054851132e-05, "loss": 0.7539, "step": 1230, "task_loss": 0.6411767601966858 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7629682421684265, "epoch": 1.04, "learning_rate": 4.977458438996901e-05, "loss": 0.7349, "step": 1231, "task_loss": 0.49180716276168823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6320040225982666, "epoch": 1.04, "learning_rate": 4.97698882314267e-05, "loss": 0.5702, "step": 1232, "task_loss": 0.6804923415184021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7626141905784607, "epoch": 1.04, "learning_rate": 4.976519207288438e-05, "loss": 0.6849, "step": 1233, "task_loss": 1.6899019479751587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7891063094139099, "epoch": 1.04, "learning_rate": 4.9760495914342073e-05, "loss": 0.861, "step": 1234, "task_loss": 0.8349094390869141 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6773739457130432, "epoch": 1.04, "learning_rate": 4.975579975579976e-05, "loss": 0.67, "step": 1235, "task_loss": 1.3414201736450195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5630843639373779, "epoch": 1.04, "learning_rate": 4.9751103597257446e-05, "loss": 0.6635, "step": 1236, "task_loss": 1.2682299613952637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6468501687049866, "epoch": 1.05, "learning_rate": 4.974640743871513e-05, "loss": 0.6584, "step": 1237, "task_loss": 1.2017923593521118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.523918867111206, "epoch": 1.05, "learning_rate": 4.974171128017282e-05, "loss": 1.0787, "step": 1238, "task_loss": 1.4862427711486816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.502358078956604, "epoch": 1.05, "learning_rate": 4.973701512163051e-05, "loss": 0.9003, "step": 1239, "task_loss": 0.12119689583778381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5020929574966431, "epoch": 1.05, "learning_rate": 4.97323189630882e-05, "loss": 0.5841, "step": 1240, "task_loss": 0.4629811644554138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43849024176597595, "epoch": 1.05, "learning_rate": 4.9727622804545885e-05, "loss": 0.6161, "step": 1241, "task_loss": 0.04425988718867302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.686779260635376, "epoch": 1.05, "learning_rate": 4.972292664600357e-05, "loss": 0.6894, "step": 1242, "task_loss": 0.680210530757904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6000853776931763, "epoch": 1.05, "learning_rate": 4.971823048746126e-05, "loss": 0.613, "step": 1243, "task_loss": 1.1395777463912964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5759631395339966, "epoch": 1.05, "learning_rate": 4.971353432891895e-05, "loss": 0.5621, "step": 1244, "task_loss": 0.5371601581573486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9586961269378662, "epoch": 1.05, "learning_rate": 4.970883817037664e-05, "loss": 0.7669, "step": 1245, "task_loss": 1.5068578720092773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45411062240600586, "epoch": 1.05, "learning_rate": 4.970414201183432e-05, "loss": 0.7323, "step": 1246, "task_loss": 0.7242676019668579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5762771368026733, "epoch": 1.05, "learning_rate": 4.969944585329201e-05, "loss": 0.8685, "step": 1247, "task_loss": 0.16207104921340942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.689229428768158, "epoch": 1.05, "learning_rate": 4.9694749694749696e-05, "loss": 0.7336, "step": 1248, "task_loss": 0.5991546511650085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0700457096099854, "epoch": 1.06, "learning_rate": 4.969005353620739e-05, "loss": 0.7709, "step": 1249, "task_loss": 1.1560986042022705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7261964082717896, "epoch": 1.06, "learning_rate": 4.968535737766507e-05, "loss": 0.8481, "step": 1250, "task_loss": 1.3103338479995728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7856221795082092, "epoch": 1.06, "learning_rate": 4.968066121912276e-05, "loss": 0.7287, "step": 1251, "task_loss": 0.7951094508171082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6956194639205933, "epoch": 1.06, "learning_rate": 4.967596506058045e-05, "loss": 0.7107, "step": 1252, "task_loss": 0.8282170295715332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7161049842834473, "epoch": 1.06, "learning_rate": 4.9671268902038134e-05, "loss": 0.7592, "step": 1253, "task_loss": 1.6762678623199463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6723149418830872, "epoch": 1.06, "learning_rate": 4.966657274349582e-05, "loss": 0.6999, "step": 1254, "task_loss": 1.400100588798523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6626062393188477, "epoch": 1.06, "learning_rate": 4.966187658495351e-05, "loss": 0.6448, "step": 1255, "task_loss": 0.15099988877773285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6046366691589355, "epoch": 1.06, "learning_rate": 4.96571804264112e-05, "loss": 0.5889, "step": 1256, "task_loss": 0.67034912109375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4277357757091522, "epoch": 1.06, "learning_rate": 4.9652484267868886e-05, "loss": 0.5749, "step": 1257, "task_loss": 0.29017412662506104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7453477382659912, "epoch": 1.06, "learning_rate": 4.964778810932657e-05, "loss": 0.6966, "step": 1258, "task_loss": 1.4840214252471924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4733334481716156, "epoch": 1.06, "learning_rate": 4.964309195078426e-05, "loss": 0.5383, "step": 1259, "task_loss": 0.06836415827274323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8039414286613464, "epoch": 1.07, "learning_rate": 4.9638395792241945e-05, "loss": 0.924, "step": 1260, "task_loss": 0.5936144590377808 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.525248646736145, "epoch": 1.07, "learning_rate": 4.963369963369964e-05, "loss": 0.611, "step": 1261, "task_loss": 1.0651646852493286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4840173125267029, "epoch": 1.07, "learning_rate": 4.9629003475157325e-05, "loss": 0.6042, "step": 1262, "task_loss": 0.754069983959198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5537488460540771, "epoch": 1.07, "learning_rate": 4.962430731661501e-05, "loss": 0.7813, "step": 1263, "task_loss": 0.8854331970214844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9403806328773499, "epoch": 1.07, "learning_rate": 4.96196111580727e-05, "loss": 0.9467, "step": 1264, "task_loss": 1.581176519393921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6699857711791992, "epoch": 1.07, "learning_rate": 4.961491499953039e-05, "loss": 0.6751, "step": 1265, "task_loss": 0.7598257064819336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8175055980682373, "epoch": 1.07, "learning_rate": 4.961021884098808e-05, "loss": 0.8366, "step": 1266, "task_loss": 0.8847934007644653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7916966676712036, "epoch": 1.07, "learning_rate": 4.9605522682445757e-05, "loss": 0.7529, "step": 1267, "task_loss": 1.867211103439331 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7315213680267334, "epoch": 1.07, "learning_rate": 4.960082652390345e-05, "loss": 0.7458, "step": 1268, "task_loss": 0.6328002214431763 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7617927193641663, "epoch": 1.07, "learning_rate": 4.9596130365361136e-05, "loss": 0.7159, "step": 1269, "task_loss": 0.6369990706443787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6732836961746216, "epoch": 1.07, "learning_rate": 4.959143420681883e-05, "loss": 0.7452, "step": 1270, "task_loss": 1.0709483623504639 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.485196590423584, "epoch": 1.07, "learning_rate": 4.958673804827651e-05, "loss": 1.0382, "step": 1271, "task_loss": 0.4776339530944824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7095962762832642, "epoch": 1.08, "learning_rate": 4.95820418897342e-05, "loss": 0.6219, "step": 1272, "task_loss": 0.8026034832000732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41314390301704407, "epoch": 1.08, "learning_rate": 4.957734573119189e-05, "loss": 0.6916, "step": 1273, "task_loss": 0.7208462357521057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6063789129257202, "epoch": 1.08, "learning_rate": 4.9572649572649575e-05, "loss": 0.7194, "step": 1274, "task_loss": 1.1068110466003418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.965367317199707, "epoch": 1.08, "learning_rate": 4.956795341410726e-05, "loss": 0.8456, "step": 1275, "task_loss": 1.0645800828933716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.68290776014328, "epoch": 1.08, "learning_rate": 4.956325725556495e-05, "loss": 0.744, "step": 1276, "task_loss": 0.45462653040885925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6011077165603638, "epoch": 1.08, "learning_rate": 4.955856109702264e-05, "loss": 0.8169, "step": 1277, "task_loss": 0.45992499589920044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1188035011291504, "epoch": 1.08, "learning_rate": 4.955386493848033e-05, "loss": 0.9039, "step": 1278, "task_loss": 1.2126431465148926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46042436361312866, "epoch": 1.08, "learning_rate": 4.954916877993801e-05, "loss": 0.6112, "step": 1279, "task_loss": 0.18423138558864594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7442106008529663, "epoch": 1.08, "learning_rate": 4.95444726213957e-05, "loss": 0.6582, "step": 1280, "task_loss": 0.9703310132026672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0708829164505005, "epoch": 1.08, "learning_rate": 4.9539776462853386e-05, "loss": 0.9249, "step": 1281, "task_loss": 1.5777108669281006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.06258225440979, "epoch": 1.08, "learning_rate": 4.953508030431108e-05, "loss": 0.7139, "step": 1282, "task_loss": 1.7329192161560059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6591900587081909, "epoch": 1.08, "learning_rate": 4.9530384145768765e-05, "loss": 0.7632, "step": 1283, "task_loss": 1.119441270828247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6411626935005188, "epoch": 1.09, "learning_rate": 4.952568798722645e-05, "loss": 0.6072, "step": 1284, "task_loss": 0.22331665456295013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5114757418632507, "epoch": 1.09, "learning_rate": 4.952099182868414e-05, "loss": 0.5168, "step": 1285, "task_loss": 0.5522854924201965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6919663548469543, "epoch": 1.09, "learning_rate": 4.9516295670141824e-05, "loss": 0.7647, "step": 1286, "task_loss": 0.45139986276626587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39347317814826965, "epoch": 1.09, "learning_rate": 4.951159951159952e-05, "loss": 0.7198, "step": 1287, "task_loss": 0.07854077219963074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7970384955406189, "epoch": 1.09, "learning_rate": 4.95069033530572e-05, "loss": 0.6829, "step": 1288, "task_loss": 0.7067249417304993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5156869888305664, "epoch": 1.09, "learning_rate": 4.950220719451489e-05, "loss": 0.8368, "step": 1289, "task_loss": 0.031796421855688095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9832377433776855, "epoch": 1.09, "learning_rate": 4.9497511035972576e-05, "loss": 0.8153, "step": 1290, "task_loss": 1.026943564414978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7531895637512207, "epoch": 1.09, "learning_rate": 4.949281487743026e-05, "loss": 0.7911, "step": 1291, "task_loss": 1.3908847570419312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8405012488365173, "epoch": 1.09, "learning_rate": 4.9488118718887956e-05, "loss": 0.883, "step": 1292, "task_loss": 1.2294728755950928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8998849987983704, "epoch": 1.09, "learning_rate": 4.9483422560345635e-05, "loss": 0.7646, "step": 1293, "task_loss": 0.4875016212463379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7589969038963318, "epoch": 1.09, "learning_rate": 4.947872640180333e-05, "loss": 0.5971, "step": 1294, "task_loss": 0.8521745800971985 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6620169878005981, "epoch": 1.09, "learning_rate": 4.9474030243261015e-05, "loss": 0.6354, "step": 1295, "task_loss": 0.8096247911453247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7119063138961792, "epoch": 1.1, "learning_rate": 4.946933408471871e-05, "loss": 0.6042, "step": 1296, "task_loss": 0.15211084485054016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5875817537307739, "epoch": 1.1, "learning_rate": 4.946463792617639e-05, "loss": 0.5022, "step": 1297, "task_loss": 0.3339223861694336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5805791616439819, "epoch": 1.1, "learning_rate": 4.9459941767634074e-05, "loss": 0.4584, "step": 1298, "task_loss": 0.5022286176681519 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2782948017120361, "epoch": 1.1, "learning_rate": 4.945524560909177e-05, "loss": 0.7876, "step": 1299, "task_loss": 1.3608078956604004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5194815993309021, "epoch": 1.1, "learning_rate": 4.945054945054945e-05, "loss": 0.5846, "step": 1300, "task_loss": 0.20397157967090607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7688887119293213, "epoch": 1.1, "learning_rate": 4.944585329200714e-05, "loss": 0.6906, "step": 1301, "task_loss": 1.317115068435669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7555819153785706, "epoch": 1.1, "learning_rate": 4.9441157133464826e-05, "loss": 0.6985, "step": 1302, "task_loss": 1.0446093082427979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7261559963226318, "epoch": 1.1, "learning_rate": 4.943646097492252e-05, "loss": 0.6837, "step": 1303, "task_loss": 0.7130631804466248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6643291711807251, "epoch": 1.1, "learning_rate": 4.9431764816380205e-05, "loss": 0.7242, "step": 1304, "task_loss": 1.5597457885742188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4893229007720947, "epoch": 1.1, "learning_rate": 4.942706865783789e-05, "loss": 0.8132, "step": 1305, "task_loss": 0.46321481466293335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6365729570388794, "epoch": 1.1, "learning_rate": 4.942237249929558e-05, "loss": 0.6875, "step": 1306, "task_loss": 0.9239042401313782 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5990861058235168, "epoch": 1.1, "learning_rate": 4.9417676340753264e-05, "loss": 0.68, "step": 1307, "task_loss": 0.3415476977825165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6146084666252136, "epoch": 1.11, "learning_rate": 4.941298018221096e-05, "loss": 0.7698, "step": 1308, "task_loss": 0.7787371873855591 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8386488556861877, "epoch": 1.11, "learning_rate": 4.9408284023668644e-05, "loss": 0.7025, "step": 1309, "task_loss": 0.8743721842765808 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6536246538162231, "epoch": 1.11, "learning_rate": 4.940358786512633e-05, "loss": 0.7131, "step": 1310, "task_loss": 1.6918139457702637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.593568742275238, "epoch": 1.11, "learning_rate": 4.9398891706584017e-05, "loss": 0.8105, "step": 1311, "task_loss": 0.5237424969673157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5485741496086121, "epoch": 1.11, "learning_rate": 4.93941955480417e-05, "loss": 0.5862, "step": 1312, "task_loss": 0.20886832475662231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8278984427452087, "epoch": 1.11, "learning_rate": 4.9389499389499396e-05, "loss": 0.7184, "step": 1313, "task_loss": 1.554363489151001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9658547639846802, "epoch": 1.11, "learning_rate": 4.9384803230957076e-05, "loss": 0.7237, "step": 1314, "task_loss": 1.3701603412628174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4553535282611847, "epoch": 1.11, "learning_rate": 4.938010707241477e-05, "loss": 0.5172, "step": 1315, "task_loss": 0.6649882793426514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8820598721504211, "epoch": 1.11, "learning_rate": 4.9375410913872455e-05, "loss": 0.6495, "step": 1316, "task_loss": 0.6993468999862671 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6757996678352356, "epoch": 1.11, "learning_rate": 4.937071475533014e-05, "loss": 0.7012, "step": 1317, "task_loss": 0.6599423885345459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6937422752380371, "epoch": 1.11, "learning_rate": 4.936601859678783e-05, "loss": 0.685, "step": 1318, "task_loss": 0.5834644436836243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8636106848716736, "epoch": 1.11, "learning_rate": 4.9361322438245514e-05, "loss": 0.5857, "step": 1319, "task_loss": 0.8539769053459167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4975464642047882, "epoch": 1.12, "learning_rate": 4.935662627970321e-05, "loss": 0.6791, "step": 1320, "task_loss": 0.4714740812778473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9269869327545166, "epoch": 1.12, "learning_rate": 4.9351930121160893e-05, "loss": 0.704, "step": 1321, "task_loss": 0.9611411690711975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.568575918674469, "epoch": 1.12, "learning_rate": 4.934723396261858e-05, "loss": 0.8606, "step": 1322, "task_loss": 2.179511070251465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8018935918807983, "epoch": 1.12, "learning_rate": 4.9342537804076266e-05, "loss": 0.7671, "step": 1323, "task_loss": 0.6106913089752197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8147054314613342, "epoch": 1.12, "learning_rate": 4.933784164553395e-05, "loss": 0.8019, "step": 1324, "task_loss": 0.7852957844734192 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8074562549591064, "epoch": 1.12, "learning_rate": 4.9333145486991646e-05, "loss": 0.8694, "step": 1325, "task_loss": 0.4415712356567383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5874978303909302, "epoch": 1.12, "learning_rate": 4.932844932844933e-05, "loss": 0.6374, "step": 1326, "task_loss": 0.5898462533950806 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6746093034744263, "epoch": 1.12, "learning_rate": 4.932375316990702e-05, "loss": 0.8523, "step": 1327, "task_loss": 0.6767503023147583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.575567901134491, "epoch": 1.12, "learning_rate": 4.9319057011364705e-05, "loss": 0.5975, "step": 1328, "task_loss": 0.47007158398628235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4073490500450134, "epoch": 1.12, "learning_rate": 4.93143608528224e-05, "loss": 0.8184, "step": 1329, "task_loss": 0.17418037354946136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8520395159721375, "epoch": 1.12, "learning_rate": 4.9309664694280084e-05, "loss": 0.8666, "step": 1330, "task_loss": 1.200476050376892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5398743748664856, "epoch": 1.13, "learning_rate": 4.9304968535737764e-05, "loss": 0.7509, "step": 1331, "task_loss": 1.1194249391555786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6337119340896606, "epoch": 1.13, "learning_rate": 4.930027237719546e-05, "loss": 0.8796, "step": 1332, "task_loss": 0.7575408220291138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4439694285392761, "epoch": 1.13, "learning_rate": 4.929557621865314e-05, "loss": 0.7545, "step": 1333, "task_loss": 0.4638672173023224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40982136130332947, "epoch": 1.13, "learning_rate": 4.9290880060110836e-05, "loss": 0.6296, "step": 1334, "task_loss": 0.7350741028785706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8842246532440186, "epoch": 1.13, "learning_rate": 4.9286183901568516e-05, "loss": 0.8604, "step": 1335, "task_loss": 1.3499548435211182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7328392267227173, "epoch": 1.13, "learning_rate": 4.928148774302621e-05, "loss": 0.9084, "step": 1336, "task_loss": 1.5237025022506714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.486014723777771, "epoch": 1.13, "learning_rate": 4.9276791584483895e-05, "loss": 0.8911, "step": 1337, "task_loss": 0.6356886625289917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8739113807678223, "epoch": 1.13, "learning_rate": 4.927209542594158e-05, "loss": 0.7797, "step": 1338, "task_loss": 1.658339500427246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6953275799751282, "epoch": 1.13, "learning_rate": 4.9267399267399275e-05, "loss": 0.5765, "step": 1339, "task_loss": 1.1478419303894043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9363539218902588, "epoch": 1.13, "learning_rate": 4.9262703108856954e-05, "loss": 0.714, "step": 1340, "task_loss": 1.7656033039093018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6214877367019653, "epoch": 1.13, "learning_rate": 4.925800695031465e-05, "loss": 0.7937, "step": 1341, "task_loss": 0.33618035912513733 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36483001708984375, "epoch": 1.13, "learning_rate": 4.9253310791772334e-05, "loss": 0.8045, "step": 1342, "task_loss": 0.32565248012542725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4432213306427002, "epoch": 1.14, "learning_rate": 4.924861463323002e-05, "loss": 0.6071, "step": 1343, "task_loss": 0.1133461445569992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2358568906784058, "epoch": 1.14, "learning_rate": 4.9243918474687706e-05, "loss": 0.8662, "step": 1344, "task_loss": 1.3504589796066284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3721703290939331, "epoch": 1.14, "learning_rate": 4.923922231614539e-05, "loss": 0.6102, "step": 1345, "task_loss": 0.649557888507843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8590005040168762, "epoch": 1.14, "learning_rate": 4.9234526157603086e-05, "loss": 0.8042, "step": 1346, "task_loss": 1.2778964042663574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6432254910469055, "epoch": 1.14, "learning_rate": 4.922982999906077e-05, "loss": 0.7079, "step": 1347, "task_loss": 0.431815505027771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5267866849899292, "epoch": 1.14, "learning_rate": 4.922513384051846e-05, "loss": 0.7442, "step": 1348, "task_loss": 1.072770357131958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3082277774810791, "epoch": 1.14, "learning_rate": 4.9220437681976145e-05, "loss": 0.5998, "step": 1349, "task_loss": 0.6159541606903076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5374014377593994, "epoch": 1.14, "learning_rate": 4.921574152343383e-05, "loss": 0.5618, "step": 1350, "task_loss": 1.7330533266067505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.497970849275589, "epoch": 1.14, "learning_rate": 4.9211045364891524e-05, "loss": 0.7302, "step": 1351, "task_loss": 0.9183330535888672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6379715204238892, "epoch": 1.14, "learning_rate": 4.9206349206349204e-05, "loss": 0.7997, "step": 1352, "task_loss": 1.020643949508667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9639503955841064, "epoch": 1.14, "learning_rate": 4.92016530478069e-05, "loss": 0.7931, "step": 1353, "task_loss": 0.5578446984291077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39265215396881104, "epoch": 1.14, "learning_rate": 4.919695688926458e-05, "loss": 0.5905, "step": 1354, "task_loss": 0.6331140398979187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4268333911895752, "epoch": 1.15, "learning_rate": 4.919226073072227e-05, "loss": 0.6924, "step": 1355, "task_loss": 0.4481344521045685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4223642349243164, "epoch": 1.15, "learning_rate": 4.918756457217996e-05, "loss": 1.0326, "step": 1356, "task_loss": 0.3113849461078644 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5445670485496521, "epoch": 1.15, "learning_rate": 4.918286841363764e-05, "loss": 0.8277, "step": 1357, "task_loss": 0.3801783323287964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8625892400741577, "epoch": 1.15, "learning_rate": 4.9178172255095335e-05, "loss": 0.8843, "step": 1358, "task_loss": 1.6925551891326904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7548710107803345, "epoch": 1.15, "learning_rate": 4.917347609655302e-05, "loss": 0.6796, "step": 1359, "task_loss": 0.15920209884643555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.846359372138977, "epoch": 1.15, "learning_rate": 4.9168779938010715e-05, "loss": 0.7868, "step": 1360, "task_loss": 0.5818783044815063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7499348521232605, "epoch": 1.15, "learning_rate": 4.9164083779468394e-05, "loss": 0.832, "step": 1361, "task_loss": 0.7431564927101135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4212087094783783, "epoch": 1.15, "learning_rate": 4.915938762092608e-05, "loss": 0.6842, "step": 1362, "task_loss": 0.5436190366744995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4444526135921478, "epoch": 1.15, "learning_rate": 4.9154691462383774e-05, "loss": 0.8394, "step": 1363, "task_loss": 0.6949467658996582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36300021409988403, "epoch": 1.15, "learning_rate": 4.914999530384146e-05, "loss": 0.6119, "step": 1364, "task_loss": 0.566525399684906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48846232891082764, "epoch": 1.15, "learning_rate": 4.9145299145299147e-05, "loss": 0.5773, "step": 1365, "task_loss": 1.2703462839126587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0408544540405273, "epoch": 1.15, "learning_rate": 4.914060298675683e-05, "loss": 0.7771, "step": 1366, "task_loss": 1.2485730648040771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4683907926082611, "epoch": 1.16, "learning_rate": 4.9135906828214526e-05, "loss": 0.5896, "step": 1367, "task_loss": 0.45093268156051636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9782313108444214, "epoch": 1.16, "learning_rate": 4.913121066967221e-05, "loss": 0.7664, "step": 1368, "task_loss": 0.9083707332611084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39896976947784424, "epoch": 1.16, "learning_rate": 4.91265145111299e-05, "loss": 0.898, "step": 1369, "task_loss": 0.6530686616897583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5919918417930603, "epoch": 1.16, "learning_rate": 4.9121818352587585e-05, "loss": 0.4871, "step": 1370, "task_loss": 0.8674747347831726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.632807731628418, "epoch": 1.16, "learning_rate": 4.911712219404527e-05, "loss": 0.5477, "step": 1371, "task_loss": 1.3166711330413818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4047320783138275, "epoch": 1.16, "learning_rate": 4.9112426035502965e-05, "loss": 0.5886, "step": 1372, "task_loss": 0.48294907808303833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9462391138076782, "epoch": 1.16, "learning_rate": 4.910772987696065e-05, "loss": 0.8627, "step": 1373, "task_loss": 1.0339065790176392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7465444803237915, "epoch": 1.16, "learning_rate": 4.910303371841834e-05, "loss": 0.7753, "step": 1374, "task_loss": 1.4320716857910156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.653499960899353, "epoch": 1.16, "learning_rate": 4.9098337559876024e-05, "loss": 0.7818, "step": 1375, "task_loss": 1.2565425634384155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8790438771247864, "epoch": 1.16, "learning_rate": 4.909364140133371e-05, "loss": 0.7579, "step": 1376, "task_loss": 0.8934701085090637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8380473256111145, "epoch": 1.16, "learning_rate": 4.90889452427914e-05, "loss": 0.7135, "step": 1377, "task_loss": 1.08989417552948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7631471157073975, "epoch": 1.16, "learning_rate": 4.908424908424908e-05, "loss": 0.8222, "step": 1378, "task_loss": 0.2712436616420746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6080143451690674, "epoch": 1.17, "learning_rate": 4.9079552925706776e-05, "loss": 0.7102, "step": 1379, "task_loss": 0.8942501544952393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6495489478111267, "epoch": 1.17, "learning_rate": 4.907485676716446e-05, "loss": 0.6702, "step": 1380, "task_loss": 0.6311919689178467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9071307182312012, "epoch": 1.17, "learning_rate": 4.907016060862215e-05, "loss": 0.7009, "step": 1381, "task_loss": 1.4740062952041626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9023734331130981, "epoch": 1.17, "learning_rate": 4.9065464450079835e-05, "loss": 0.6568, "step": 1382, "task_loss": 0.8073087930679321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7801535129547119, "epoch": 1.17, "learning_rate": 4.906076829153752e-05, "loss": 0.6868, "step": 1383, "task_loss": 2.6489248275756836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9147593379020691, "epoch": 1.17, "learning_rate": 4.9056072132995214e-05, "loss": 0.6275, "step": 1384, "task_loss": 0.6405540704727173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9946785569190979, "epoch": 1.17, "learning_rate": 4.90513759744529e-05, "loss": 0.6814, "step": 1385, "task_loss": 1.0420730113983154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7333429455757141, "epoch": 1.17, "learning_rate": 4.904667981591059e-05, "loss": 0.6264, "step": 1386, "task_loss": 0.7843606472015381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33305492997169495, "epoch": 1.17, "learning_rate": 4.904198365736827e-05, "loss": 0.6538, "step": 1387, "task_loss": 0.21004749834537506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6137186884880066, "epoch": 1.17, "learning_rate": 4.903728749882596e-05, "loss": 0.6505, "step": 1388, "task_loss": 0.5288998484611511 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5535995960235596, "epoch": 1.17, "learning_rate": 4.903259134028365e-05, "loss": 0.6088, "step": 1389, "task_loss": 1.2242867946624756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7867120504379272, "epoch": 1.17, "learning_rate": 4.902789518174134e-05, "loss": 0.8034, "step": 1390, "task_loss": 0.6673921346664429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5648953914642334, "epoch": 1.18, "learning_rate": 4.9023199023199025e-05, "loss": 0.7146, "step": 1391, "task_loss": 0.677344024181366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.958598792552948, "epoch": 1.18, "learning_rate": 4.901850286465671e-05, "loss": 0.8642, "step": 1392, "task_loss": 0.44628605246543884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5469300746917725, "epoch": 1.18, "learning_rate": 4.90138067061144e-05, "loss": 0.7924, "step": 1393, "task_loss": 0.9526567459106445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7930124402046204, "epoch": 1.18, "learning_rate": 4.900911054757209e-05, "loss": 0.7569, "step": 1394, "task_loss": 1.274341344833374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3054381608963013, "epoch": 1.18, "learning_rate": 4.900441438902977e-05, "loss": 0.7677, "step": 1395, "task_loss": 0.6572279930114746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9562807083129883, "epoch": 1.18, "learning_rate": 4.8999718230487464e-05, "loss": 0.8534, "step": 1396, "task_loss": 1.3699431419372559 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.605976402759552, "epoch": 1.18, "learning_rate": 4.899502207194515e-05, "loss": 0.6895, "step": 1397, "task_loss": 1.34196937084198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7721555829048157, "epoch": 1.18, "learning_rate": 4.899032591340284e-05, "loss": 0.768, "step": 1398, "task_loss": 0.4691799283027649 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6506397724151611, "epoch": 1.18, "learning_rate": 4.898562975486053e-05, "loss": 0.614, "step": 1399, "task_loss": 1.6939071416854858 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8317692875862122, "epoch": 1.18, "learning_rate": 4.898093359631821e-05, "loss": 0.5782, "step": 1400, "task_loss": 0.9152218699455261 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8424049019813538, "epoch": 1.18, "learning_rate": 4.89762374377759e-05, "loss": 0.8074, "step": 1401, "task_loss": 1.3732789754867554 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8976304531097412, "epoch": 1.19, "learning_rate": 4.897154127923359e-05, "loss": 0.6254, "step": 1402, "task_loss": 1.0989890098571777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6012596487998962, "epoch": 1.19, "learning_rate": 4.896684512069128e-05, "loss": 0.4635, "step": 1403, "task_loss": 2.0580663681030273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5414929389953613, "epoch": 1.19, "learning_rate": 4.896214896214896e-05, "loss": 0.5749, "step": 1404, "task_loss": 0.8120653033256531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4800635576248169, "epoch": 1.19, "learning_rate": 4.8957452803606654e-05, "loss": 0.6734, "step": 1405, "task_loss": 0.9081025719642639 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6461237668991089, "epoch": 1.19, "learning_rate": 4.895275664506434e-05, "loss": 0.6705, "step": 1406, "task_loss": 1.3749754428863525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7342914938926697, "epoch": 1.19, "learning_rate": 4.894806048652203e-05, "loss": 0.9409, "step": 1407, "task_loss": 1.5672852993011475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6416138410568237, "epoch": 1.19, "learning_rate": 4.8943364327979713e-05, "loss": 0.6717, "step": 1408, "task_loss": 1.0396579504013062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6857807040214539, "epoch": 1.19, "learning_rate": 4.89386681694374e-05, "loss": 0.7285, "step": 1409, "task_loss": 1.169476866722107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7734683156013489, "epoch": 1.19, "learning_rate": 4.893397201089509e-05, "loss": 0.7427, "step": 1410, "task_loss": 1.4853997230529785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.180363416671753, "epoch": 1.19, "learning_rate": 4.892927585235278e-05, "loss": 0.9564, "step": 1411, "task_loss": 1.29390549659729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46620845794677734, "epoch": 1.19, "learning_rate": 4.8924579693810466e-05, "loss": 0.7059, "step": 1412, "task_loss": 0.9661598205566406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8473305702209473, "epoch": 1.19, "learning_rate": 4.891988353526815e-05, "loss": 0.5465, "step": 1413, "task_loss": 0.8640350103378296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5016604661941528, "epoch": 1.2, "learning_rate": 4.891518737672584e-05, "loss": 0.5464, "step": 1414, "task_loss": 1.0885443687438965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8723794221878052, "epoch": 1.2, "learning_rate": 4.891049121818353e-05, "loss": 0.8293, "step": 1415, "task_loss": 1.7057725191116333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5939933657646179, "epoch": 1.2, "learning_rate": 4.890579505964122e-05, "loss": 0.7045, "step": 1416, "task_loss": 0.9896551966667175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8737054467201233, "epoch": 1.2, "learning_rate": 4.8901098901098904e-05, "loss": 0.8535, "step": 1417, "task_loss": 1.3610942363739014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.615134596824646, "epoch": 1.2, "learning_rate": 4.889640274255659e-05, "loss": 0.7214, "step": 1418, "task_loss": 1.0902196168899536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4507373571395874, "epoch": 1.2, "learning_rate": 4.889170658401428e-05, "loss": 0.6735, "step": 1419, "task_loss": 0.843724250793457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45953434705734253, "epoch": 1.2, "learning_rate": 4.888701042547197e-05, "loss": 0.693, "step": 1420, "task_loss": 0.23189134895801544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40092235803604126, "epoch": 1.2, "learning_rate": 4.888231426692965e-05, "loss": 0.5416, "step": 1421, "task_loss": 0.10509082674980164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1163768768310547, "epoch": 1.2, "learning_rate": 4.887761810838734e-05, "loss": 0.8254, "step": 1422, "task_loss": 1.9047534465789795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34128084778785706, "epoch": 1.2, "learning_rate": 4.887292194984503e-05, "loss": 0.5723, "step": 1423, "task_loss": 0.6849923133850098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7540886402130127, "epoch": 1.2, "learning_rate": 4.886822579130272e-05, "loss": 0.6781, "step": 1424, "task_loss": 1.2089835405349731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8466973304748535, "epoch": 1.2, "learning_rate": 4.88635296327604e-05, "loss": 0.8919, "step": 1425, "task_loss": 1.097320556640625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48765939474105835, "epoch": 1.21, "learning_rate": 4.885883347421809e-05, "loss": 0.491, "step": 1426, "task_loss": 1.0128005743026733 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9243703484535217, "epoch": 1.21, "learning_rate": 4.885413731567578e-05, "loss": 0.8056, "step": 1427, "task_loss": 1.4640958309173584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.927590012550354, "epoch": 1.21, "learning_rate": 4.884944115713347e-05, "loss": 0.9327, "step": 1428, "task_loss": 1.7155274152755737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8107530474662781, "epoch": 1.21, "learning_rate": 4.8844744998591154e-05, "loss": 0.7344, "step": 1429, "task_loss": 1.0139280557632446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6632162928581238, "epoch": 1.21, "learning_rate": 4.884004884004884e-05, "loss": 0.6142, "step": 1430, "task_loss": 0.7040442824363708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6531332731246948, "epoch": 1.21, "learning_rate": 4.883535268150653e-05, "loss": 0.5637, "step": 1431, "task_loss": 0.8643351197242737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2865351438522339, "epoch": 1.21, "learning_rate": 4.883065652296422e-05, "loss": 0.8943, "step": 1432, "task_loss": 1.7488481998443604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3825588822364807, "epoch": 1.21, "learning_rate": 4.8825960364421906e-05, "loss": 0.5443, "step": 1433, "task_loss": 0.12511196732521057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3950383961200714, "epoch": 1.21, "learning_rate": 4.882126420587959e-05, "loss": 0.6545, "step": 1434, "task_loss": 0.6410995125770569 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4567745327949524, "epoch": 1.21, "learning_rate": 4.881656804733728e-05, "loss": 0.5903, "step": 1435, "task_loss": 0.3002862334251404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9099655747413635, "epoch": 1.21, "learning_rate": 4.881187188879497e-05, "loss": 0.6455, "step": 1436, "task_loss": 0.974285900592804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6794091463088989, "epoch": 1.21, "learning_rate": 4.880717573025266e-05, "loss": 0.6036, "step": 1437, "task_loss": 0.6677008867263794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7506954669952393, "epoch": 1.22, "learning_rate": 4.8802479571710344e-05, "loss": 0.6353, "step": 1438, "task_loss": 0.5875498652458191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3806507587432861, "epoch": 1.22, "learning_rate": 4.879778341316803e-05, "loss": 0.8748, "step": 1439, "task_loss": 1.5451762676239014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4106060862541199, "epoch": 1.22, "learning_rate": 4.879308725462572e-05, "loss": 0.5966, "step": 1440, "task_loss": 0.24030889570713043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8642206192016602, "epoch": 1.22, "learning_rate": 4.878839109608341e-05, "loss": 0.8445, "step": 1441, "task_loss": 0.6205974817276001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5707499384880066, "epoch": 1.22, "learning_rate": 4.878369493754109e-05, "loss": 0.5556, "step": 1442, "task_loss": 0.8413172960281372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4867216944694519, "epoch": 1.22, "learning_rate": 4.877899877899878e-05, "loss": 0.7536, "step": 1443, "task_loss": 0.7452800869941711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.438819020986557, "epoch": 1.22, "learning_rate": 4.877430262045647e-05, "loss": 0.5027, "step": 1444, "task_loss": 0.21872316300868988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7449390292167664, "epoch": 1.22, "learning_rate": 4.8769606461914155e-05, "loss": 0.7909, "step": 1445, "task_loss": 0.31885644793510437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2754833698272705, "epoch": 1.22, "learning_rate": 4.876491030337185e-05, "loss": 0.7787, "step": 1446, "task_loss": 1.1766369342803955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1663405895233154, "epoch": 1.22, "learning_rate": 4.876021414482953e-05, "loss": 0.9049, "step": 1447, "task_loss": 1.3685373067855835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8504725694656372, "epoch": 1.22, "learning_rate": 4.875551798628722e-05, "loss": 0.5663, "step": 1448, "task_loss": 0.6640506386756897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5474025011062622, "epoch": 1.22, "learning_rate": 4.875082182774491e-05, "loss": 0.5725, "step": 1449, "task_loss": 0.43533891439437866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8537062406539917, "epoch": 1.23, "learning_rate": 4.8746125669202594e-05, "loss": 0.8167, "step": 1450, "task_loss": 1.455654501914978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6113744974136353, "epoch": 1.23, "learning_rate": 4.874142951066028e-05, "loss": 0.713, "step": 1451, "task_loss": 0.8146095275878906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.169542670249939, "epoch": 1.23, "learning_rate": 4.8736733352117967e-05, "loss": 0.7603, "step": 1452, "task_loss": 2.2094736099243164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5497122406959534, "epoch": 1.23, "learning_rate": 4.873203719357566e-05, "loss": 0.5944, "step": 1453, "task_loss": 0.392924964427948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46350717544555664, "epoch": 1.23, "learning_rate": 4.8727341035033346e-05, "loss": 0.608, "step": 1454, "task_loss": 0.8149264454841614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6392902135848999, "epoch": 1.23, "learning_rate": 4.872264487649103e-05, "loss": 0.5186, "step": 1455, "task_loss": 0.9574739336967468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5297787189483643, "epoch": 1.23, "learning_rate": 4.871794871794872e-05, "loss": 0.8129, "step": 1456, "task_loss": 0.15154540538787842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5194440484046936, "epoch": 1.23, "learning_rate": 4.8713252559406405e-05, "loss": 0.7873, "step": 1457, "task_loss": 0.3596600592136383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6684011220932007, "epoch": 1.23, "learning_rate": 4.87085564008641e-05, "loss": 0.784, "step": 1458, "task_loss": 1.0689395666122437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7673236131668091, "epoch": 1.23, "learning_rate": 4.870386024232178e-05, "loss": 0.7741, "step": 1459, "task_loss": 0.8773324489593506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.747677206993103, "epoch": 1.23, "learning_rate": 4.869916408377947e-05, "loss": 0.7212, "step": 1460, "task_loss": 2.2411882877349854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8411785960197449, "epoch": 1.23, "learning_rate": 4.869446792523716e-05, "loss": 0.7386, "step": 1461, "task_loss": 1.0019055604934692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6762418150901794, "epoch": 1.24, "learning_rate": 4.868977176669485e-05, "loss": 0.7679, "step": 1462, "task_loss": 0.5109889507293701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6998376846313477, "epoch": 1.24, "learning_rate": 4.868507560815254e-05, "loss": 0.7607, "step": 1463, "task_loss": 0.6151875257492065 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5083563327789307, "epoch": 1.24, "learning_rate": 4.8680379449610216e-05, "loss": 0.7098, "step": 1464, "task_loss": 1.4406720399856567 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7890558242797852, "epoch": 1.24, "learning_rate": 4.867568329106791e-05, "loss": 0.7611, "step": 1465, "task_loss": 0.9617787003517151 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6055943369865417, "epoch": 1.24, "learning_rate": 4.8670987132525596e-05, "loss": 0.6059, "step": 1466, "task_loss": 0.6770760416984558 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37366679310798645, "epoch": 1.24, "learning_rate": 4.866629097398329e-05, "loss": 0.5127, "step": 1467, "task_loss": 0.43487149477005005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7518611550331116, "epoch": 1.24, "learning_rate": 4.866159481544097e-05, "loss": 0.7332, "step": 1468, "task_loss": 1.145060420036316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.293322205543518, "epoch": 1.24, "learning_rate": 4.865689865689866e-05, "loss": 0.6716, "step": 1469, "task_loss": 1.5360387563705444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4704620838165283, "epoch": 1.24, "learning_rate": 4.865220249835635e-05, "loss": 0.7537, "step": 1470, "task_loss": 0.5821393728256226 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6113060116767883, "epoch": 1.24, "learning_rate": 4.8647506339814034e-05, "loss": 0.5516, "step": 1471, "task_loss": 1.1429622173309326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6013695001602173, "epoch": 1.24, "learning_rate": 4.864281018127172e-05, "loss": 0.6482, "step": 1472, "task_loss": 0.48303598165512085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4717157483100891, "epoch": 1.24, "learning_rate": 4.863811402272941e-05, "loss": 0.4949, "step": 1473, "task_loss": 0.44477030634880066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5871829986572266, "epoch": 1.25, "learning_rate": 4.86334178641871e-05, "loss": 0.7022, "step": 1474, "task_loss": 0.5484627485275269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4277307689189911, "epoch": 1.25, "learning_rate": 4.8628721705644786e-05, "loss": 0.5228, "step": 1475, "task_loss": 0.9223886132240295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6287857294082642, "epoch": 1.25, "learning_rate": 4.862402554710247e-05, "loss": 0.5617, "step": 1476, "task_loss": 1.3650906085968018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5969974398612976, "epoch": 1.25, "learning_rate": 4.861932938856016e-05, "loss": 0.7971, "step": 1477, "task_loss": 0.6549233794212341 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4627194404602051, "epoch": 1.25, "learning_rate": 4.8614633230017845e-05, "loss": 0.5524, "step": 1478, "task_loss": 0.8653776049613953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5009497404098511, "epoch": 1.25, "learning_rate": 4.860993707147554e-05, "loss": 0.5393, "step": 1479, "task_loss": 0.6862202882766724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3924330472946167, "epoch": 1.25, "learning_rate": 4.8605240912933225e-05, "loss": 0.5153, "step": 1480, "task_loss": 0.8409653306007385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8244216442108154, "epoch": 1.25, "learning_rate": 4.860054475439091e-05, "loss": 0.5856, "step": 1481, "task_loss": 1.0686450004577637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46737462282180786, "epoch": 1.25, "learning_rate": 4.85958485958486e-05, "loss": 0.5875, "step": 1482, "task_loss": 1.671823501586914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5929138660430908, "epoch": 1.25, "learning_rate": 4.8591152437306284e-05, "loss": 0.8679, "step": 1483, "task_loss": 1.842167615890503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6020407676696777, "epoch": 1.25, "learning_rate": 4.858645627876398e-05, "loss": 0.6542, "step": 1484, "task_loss": 0.38895007967948914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6603551506996155, "epoch": 1.26, "learning_rate": 4.8581760120221656e-05, "loss": 1.0281, "step": 1485, "task_loss": 0.7223086953163147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5581532120704651, "epoch": 1.26, "learning_rate": 4.857706396167935e-05, "loss": 0.5386, "step": 1486, "task_loss": 0.8768975734710693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40406912565231323, "epoch": 1.26, "learning_rate": 4.8572367803137036e-05, "loss": 0.5613, "step": 1487, "task_loss": 0.5109216570854187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8604691624641418, "epoch": 1.26, "learning_rate": 4.856767164459472e-05, "loss": 0.6734, "step": 1488, "task_loss": 0.8521256446838379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47607240080833435, "epoch": 1.26, "learning_rate": 4.856297548605241e-05, "loss": 0.5991, "step": 1489, "task_loss": 0.6997964382171631 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5973621010780334, "epoch": 1.26, "learning_rate": 4.8558279327510095e-05, "loss": 0.6247, "step": 1490, "task_loss": 0.4725404977798462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6064885258674622, "epoch": 1.26, "learning_rate": 4.855358316896779e-05, "loss": 0.6619, "step": 1491, "task_loss": 0.6820220351219177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4797917306423187, "epoch": 1.26, "learning_rate": 4.8548887010425474e-05, "loss": 0.458, "step": 1492, "task_loss": 1.1840749979019165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8786188364028931, "epoch": 1.26, "learning_rate": 4.854419085188317e-05, "loss": 0.6007, "step": 1493, "task_loss": 1.5696059465408325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8445448875427246, "epoch": 1.26, "learning_rate": 4.853949469334085e-05, "loss": 0.8566, "step": 1494, "task_loss": 0.3826449513435364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.508175790309906, "epoch": 1.26, "learning_rate": 4.8534798534798533e-05, "loss": 0.6399, "step": 1495, "task_loss": 1.032458782196045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5691758990287781, "epoch": 1.26, "learning_rate": 4.8530102376256227e-05, "loss": 0.5074, "step": 1496, "task_loss": 0.5862833857536316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8085304498672485, "epoch": 1.27, "learning_rate": 4.852540621771391e-05, "loss": 0.6452, "step": 1497, "task_loss": 0.573459804058075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5299599766731262, "epoch": 1.27, "learning_rate": 4.85207100591716e-05, "loss": 0.5753, "step": 1498, "task_loss": 0.72123122215271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6362002491950989, "epoch": 1.27, "learning_rate": 4.8516013900629286e-05, "loss": 0.6339, "step": 1499, "task_loss": 1.2735553979873657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9239736795425415, "epoch": 1.27, "learning_rate": 4.851131774208698e-05, "loss": 0.7536, "step": 1500, "task_loss": 1.9095009565353394 }, { "epoch": 1.27, "eval_accuracy": 0.8983762376237624, "eval_loss": 0.384400337934494, "eval_runtime": 225.5502, "eval_samples_per_second": 111.948, "eval_steps_per_second": 0.878, "step": 1500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4541608691215515, "epoch": 1.27, "learning_rate": 4.8506621583544665e-05, "loss": 0.7779, "step": 1501, "task_loss": 1.2713098526000977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6834964156150818, "epoch": 1.27, "learning_rate": 4.850192542500235e-05, "loss": 0.6977, "step": 1502, "task_loss": 0.5897018313407898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3659794330596924, "epoch": 1.27, "learning_rate": 4.849722926646004e-05, "loss": 0.4436, "step": 1503, "task_loss": 0.9696637392044067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5866661071777344, "epoch": 1.27, "learning_rate": 4.8492533107917724e-05, "loss": 0.7411, "step": 1504, "task_loss": 0.6079413294792175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.993211030960083, "epoch": 1.27, "learning_rate": 4.848783694937542e-05, "loss": 0.7243, "step": 1505, "task_loss": 0.650210440158844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7521438598632812, "epoch": 1.27, "learning_rate": 4.84831407908331e-05, "loss": 0.7333, "step": 1506, "task_loss": 1.0649361610412598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44461825489997864, "epoch": 1.27, "learning_rate": 4.847844463229079e-05, "loss": 0.5687, "step": 1507, "task_loss": 0.5807681083679199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0357040166854858, "epoch": 1.27, "learning_rate": 4.8473748473748476e-05, "loss": 0.7464, "step": 1508, "task_loss": 0.9631022810935974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9178816676139832, "epoch": 1.28, "learning_rate": 4.846905231520616e-05, "loss": 0.7092, "step": 1509, "task_loss": 0.3072935938835144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5416196584701538, "epoch": 1.28, "learning_rate": 4.8464356156663856e-05, "loss": 0.5947, "step": 1510, "task_loss": 1.4290881156921387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5554755926132202, "epoch": 1.28, "learning_rate": 4.8459659998121535e-05, "loss": 0.645, "step": 1511, "task_loss": 0.6330921649932861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0555421113967896, "epoch": 1.28, "learning_rate": 4.845496383957923e-05, "loss": 0.6355, "step": 1512, "task_loss": 1.083510398864746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7530368566513062, "epoch": 1.28, "learning_rate": 4.8450267681036915e-05, "loss": 0.5645, "step": 1513, "task_loss": 0.4762268364429474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7671228647232056, "epoch": 1.28, "learning_rate": 4.84455715224946e-05, "loss": 0.7296, "step": 1514, "task_loss": 1.7188999652862549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3586541712284088, "epoch": 1.28, "learning_rate": 4.844087536395229e-05, "loss": 0.7537, "step": 1515, "task_loss": 0.0351874902844429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6425732374191284, "epoch": 1.28, "learning_rate": 4.8436179205409974e-05, "loss": 0.6508, "step": 1516, "task_loss": 0.8825625777244568 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3740810751914978, "epoch": 1.28, "learning_rate": 4.843148304686767e-05, "loss": 0.7239, "step": 1517, "task_loss": 1.384949803352356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44700026512145996, "epoch": 1.28, "learning_rate": 4.842678688832535e-05, "loss": 0.7097, "step": 1518, "task_loss": 0.6316410303115845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5783646106719971, "epoch": 1.28, "learning_rate": 4.842209072978304e-05, "loss": 0.7064, "step": 1519, "task_loss": 0.16291916370391846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8869997262954712, "epoch": 1.28, "learning_rate": 4.8417394571240726e-05, "loss": 0.7833, "step": 1520, "task_loss": 1.9359475374221802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5382350087165833, "epoch": 1.29, "learning_rate": 4.841269841269841e-05, "loss": 0.6654, "step": 1521, "task_loss": 1.3381412029266357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8865202069282532, "epoch": 1.29, "learning_rate": 4.8408002254156105e-05, "loss": 0.6251, "step": 1522, "task_loss": 1.247222900390625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42301034927368164, "epoch": 1.29, "learning_rate": 4.840330609561379e-05, "loss": 0.5963, "step": 1523, "task_loss": 0.09978712350130081 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7938084006309509, "epoch": 1.29, "learning_rate": 4.839860993707148e-05, "loss": 0.6773, "step": 1524, "task_loss": 0.7873339056968689 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8397322297096252, "epoch": 1.29, "learning_rate": 4.8393913778529164e-05, "loss": 0.7738, "step": 1525, "task_loss": 1.5768003463745117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3896780014038086, "epoch": 1.29, "learning_rate": 4.838921761998686e-05, "loss": 0.5041, "step": 1526, "task_loss": 0.05013284087181091 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7383967041969299, "epoch": 1.29, "learning_rate": 4.8384521461444544e-05, "loss": 0.7123, "step": 1527, "task_loss": 0.9478172659873962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5221855640411377, "epoch": 1.29, "learning_rate": 4.837982530290222e-05, "loss": 0.6003, "step": 1528, "task_loss": 0.2976978123188019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6729661226272583, "epoch": 1.29, "learning_rate": 4.8375129144359916e-05, "loss": 0.9099, "step": 1529, "task_loss": 0.8623117208480835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4779791235923767, "epoch": 1.29, "learning_rate": 4.83704329858176e-05, "loss": 0.6113, "step": 1530, "task_loss": 0.5173561573028564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5177722573280334, "epoch": 1.29, "learning_rate": 4.8365736827275296e-05, "loss": 0.6806, "step": 1531, "task_loss": 0.5905522108078003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5924851894378662, "epoch": 1.29, "learning_rate": 4.8361040668732975e-05, "loss": 0.4781, "step": 1532, "task_loss": 1.018708348274231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5291149616241455, "epoch": 1.3, "learning_rate": 4.835634451019067e-05, "loss": 0.556, "step": 1533, "task_loss": 0.804455578327179 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8562993407249451, "epoch": 1.3, "learning_rate": 4.8351648351648355e-05, "loss": 0.6658, "step": 1534, "task_loss": 1.2366138696670532 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42133986949920654, "epoch": 1.3, "learning_rate": 4.834695219310604e-05, "loss": 0.6269, "step": 1535, "task_loss": 0.5058164000511169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8551740646362305, "epoch": 1.3, "learning_rate": 4.834225603456373e-05, "loss": 0.7505, "step": 1536, "task_loss": 0.5807523727416992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6683972477912903, "epoch": 1.3, "learning_rate": 4.8337559876021414e-05, "loss": 0.5743, "step": 1537, "task_loss": 1.1487483978271484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8252105712890625, "epoch": 1.3, "learning_rate": 4.833286371747911e-05, "loss": 0.5636, "step": 1538, "task_loss": 0.46509143710136414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.467268168926239, "epoch": 1.3, "learning_rate": 4.832816755893679e-05, "loss": 0.6713, "step": 1539, "task_loss": 0.44934970140457153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.218357801437378, "epoch": 1.3, "learning_rate": 4.832347140039448e-05, "loss": 0.7939, "step": 1540, "task_loss": 1.5335711240768433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4192107617855072, "epoch": 1.3, "learning_rate": 4.8318775241852166e-05, "loss": 0.4678, "step": 1541, "task_loss": 0.25379788875579834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8147869110107422, "epoch": 1.3, "learning_rate": 4.831407908330985e-05, "loss": 0.7316, "step": 1542, "task_loss": 1.2931303977966309 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4654761850833893, "epoch": 1.3, "learning_rate": 4.8309382924767545e-05, "loss": 0.7304, "step": 1543, "task_loss": 0.6167084574699402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9480953216552734, "epoch": 1.3, "learning_rate": 4.830468676622523e-05, "loss": 0.7739, "step": 1544, "task_loss": 2.199705123901367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6186559200286865, "epoch": 1.31, "learning_rate": 4.829999060768292e-05, "loss": 0.5792, "step": 1545, "task_loss": 0.7755299210548401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.438176691532135, "epoch": 1.31, "learning_rate": 4.8295294449140604e-05, "loss": 0.5201, "step": 1546, "task_loss": 0.4548749327659607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.529374361038208, "epoch": 1.31, "learning_rate": 4.829059829059829e-05, "loss": 0.6641, "step": 1547, "task_loss": 0.3805043697357178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6572398543357849, "epoch": 1.31, "learning_rate": 4.8285902132055984e-05, "loss": 0.5917, "step": 1548, "task_loss": 1.7817422151565552 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7891438007354736, "epoch": 1.31, "learning_rate": 4.8281205973513664e-05, "loss": 0.665, "step": 1549, "task_loss": 0.8317681550979614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9146137237548828, "epoch": 1.31, "learning_rate": 4.827650981497136e-05, "loss": 0.7327, "step": 1550, "task_loss": 0.7852668166160583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35905563831329346, "epoch": 1.31, "learning_rate": 4.827181365642904e-05, "loss": 0.5571, "step": 1551, "task_loss": 0.4135622978210449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6953215003013611, "epoch": 1.31, "learning_rate": 4.826711749788673e-05, "loss": 0.7756, "step": 1552, "task_loss": 0.7486950159072876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5961663126945496, "epoch": 1.31, "learning_rate": 4.826242133934442e-05, "loss": 0.6144, "step": 1553, "task_loss": 0.9695446491241455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5856481790542603, "epoch": 1.31, "learning_rate": 4.82577251808021e-05, "loss": 0.6533, "step": 1554, "task_loss": 0.8537415266036987 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5874646306037903, "epoch": 1.31, "learning_rate": 4.8253029022259795e-05, "loss": 0.7159, "step": 1555, "task_loss": 0.7377356886863708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6491216421127319, "epoch": 1.32, "learning_rate": 4.824833286371748e-05, "loss": 0.7145, "step": 1556, "task_loss": 0.9537845849990845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6093196868896484, "epoch": 1.32, "learning_rate": 4.8243636705175175e-05, "loss": 0.5625, "step": 1557, "task_loss": 0.913056492805481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9808435440063477, "epoch": 1.32, "learning_rate": 4.8238940546632854e-05, "loss": 0.79, "step": 1558, "task_loss": 0.6423010230064392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4502407908439636, "epoch": 1.32, "learning_rate": 4.823424438809054e-05, "loss": 0.6405, "step": 1559, "task_loss": 1.2359646558761597 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7274014949798584, "epoch": 1.32, "learning_rate": 4.8229548229548234e-05, "loss": 0.6811, "step": 1560, "task_loss": 1.3714863061904907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1834542751312256, "epoch": 1.32, "learning_rate": 4.822485207100592e-05, "loss": 0.7573, "step": 1561, "task_loss": 0.9782078266143799 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30421096086502075, "epoch": 1.32, "learning_rate": 4.8220155912463606e-05, "loss": 0.4143, "step": 1562, "task_loss": 0.08327803015708923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7491362690925598, "epoch": 1.32, "learning_rate": 4.821545975392129e-05, "loss": 0.7168, "step": 1563, "task_loss": 0.5314494967460632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3689817488193512, "epoch": 1.32, "learning_rate": 4.8210763595378986e-05, "loss": 0.5334, "step": 1564, "task_loss": 1.0091383457183838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6672305464744568, "epoch": 1.32, "learning_rate": 4.820606743683667e-05, "loss": 0.5295, "step": 1565, "task_loss": 0.3639557957649231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8660675287246704, "epoch": 1.32, "learning_rate": 4.820137127829435e-05, "loss": 0.7491, "step": 1566, "task_loss": 0.7016577124595642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5591275691986084, "epoch": 1.32, "learning_rate": 4.8196675119752045e-05, "loss": 0.5908, "step": 1567, "task_loss": 0.5133832097053528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5554888844490051, "epoch": 1.33, "learning_rate": 4.819197896120973e-05, "loss": 0.5304, "step": 1568, "task_loss": 0.6774203181266785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49192625284194946, "epoch": 1.33, "learning_rate": 4.8187282802667424e-05, "loss": 0.6862, "step": 1569, "task_loss": 0.6872148513793945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3258918225765228, "epoch": 1.33, "learning_rate": 4.818258664412511e-05, "loss": 0.5044, "step": 1570, "task_loss": 0.6227383613586426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6040571331977844, "epoch": 1.33, "learning_rate": 4.81778904855828e-05, "loss": 0.5301, "step": 1571, "task_loss": 0.5737279653549194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5569165945053101, "epoch": 1.33, "learning_rate": 4.817319432704048e-05, "loss": 0.44, "step": 1572, "task_loss": 0.4166593849658966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4880947470664978, "epoch": 1.33, "learning_rate": 4.816849816849817e-05, "loss": 0.5419, "step": 1573, "task_loss": 0.13297852873802185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5671811699867249, "epoch": 1.33, "learning_rate": 4.816380200995586e-05, "loss": 0.4717, "step": 1574, "task_loss": 0.6271976828575134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4290007948875427, "epoch": 1.33, "learning_rate": 4.815910585141354e-05, "loss": 0.5386, "step": 1575, "task_loss": 1.2407691478729248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41234272718429565, "epoch": 1.33, "learning_rate": 4.8154409692871235e-05, "loss": 0.474, "step": 1576, "task_loss": 0.18758268654346466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3833475112915039, "epoch": 1.33, "learning_rate": 4.814971353432892e-05, "loss": 0.3943, "step": 1577, "task_loss": 0.26754525303840637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8405159711837769, "epoch": 1.33, "learning_rate": 4.814501737578661e-05, "loss": 0.8073, "step": 1578, "task_loss": 0.13382793962955475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6220155954360962, "epoch": 1.33, "learning_rate": 4.8140321217244294e-05, "loss": 0.7351, "step": 1579, "task_loss": 1.399150013923645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8404505252838135, "epoch": 1.34, "learning_rate": 4.813562505870198e-05, "loss": 0.6545, "step": 1580, "task_loss": 0.8882309198379517 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6380572319030762, "epoch": 1.34, "learning_rate": 4.8130928900159674e-05, "loss": 0.7127, "step": 1581, "task_loss": 0.9962065815925598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5462014079093933, "epoch": 1.34, "learning_rate": 4.812623274161736e-05, "loss": 0.6195, "step": 1582, "task_loss": 1.1186436414718628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3981356620788574, "epoch": 1.34, "learning_rate": 4.8121536583075046e-05, "loss": 0.5196, "step": 1583, "task_loss": 0.7639935612678528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.851836621761322, "epoch": 1.34, "learning_rate": 4.811684042453273e-05, "loss": 0.7196, "step": 1584, "task_loss": 1.3613327741622925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0095330476760864, "epoch": 1.34, "learning_rate": 4.811214426599042e-05, "loss": 0.7142, "step": 1585, "task_loss": 0.5311954021453857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6523898839950562, "epoch": 1.34, "learning_rate": 4.810744810744811e-05, "loss": 0.5961, "step": 1586, "task_loss": 0.28239595890045166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5329494476318359, "epoch": 1.34, "learning_rate": 4.81027519489058e-05, "loss": 0.5672, "step": 1587, "task_loss": 0.3667519986629486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48556065559387207, "epoch": 1.34, "learning_rate": 4.8098055790363485e-05, "loss": 0.6176, "step": 1588, "task_loss": 0.36737215518951416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38980481028556824, "epoch": 1.34, "learning_rate": 4.809335963182117e-05, "loss": 0.5574, "step": 1589, "task_loss": 0.5701003670692444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4722679853439331, "epoch": 1.34, "learning_rate": 4.808866347327886e-05, "loss": 0.6254, "step": 1590, "task_loss": 0.5817776918411255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7635914087295532, "epoch": 1.34, "learning_rate": 4.808396731473655e-05, "loss": 0.564, "step": 1591, "task_loss": 0.5323767066001892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5559121370315552, "epoch": 1.35, "learning_rate": 4.807927115619423e-05, "loss": 0.4693, "step": 1592, "task_loss": 0.9551624655723572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7257329225540161, "epoch": 1.35, "learning_rate": 4.8074574997651923e-05, "loss": 0.5297, "step": 1593, "task_loss": 0.8044682741165161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4701245427131653, "epoch": 1.35, "learning_rate": 4.806987883910961e-05, "loss": 0.6486, "step": 1594, "task_loss": 0.8444935083389282 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43521493673324585, "epoch": 1.35, "learning_rate": 4.80651826805673e-05, "loss": 0.6163, "step": 1595, "task_loss": 0.12006933242082596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4179456830024719, "epoch": 1.35, "learning_rate": 4.806048652202498e-05, "loss": 0.7941, "step": 1596, "task_loss": 1.2519679069519043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5762587785720825, "epoch": 1.35, "learning_rate": 4.8055790363482676e-05, "loss": 0.6821, "step": 1597, "task_loss": 0.8445903062820435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4906913936138153, "epoch": 1.35, "learning_rate": 4.805109420494036e-05, "loss": 0.5564, "step": 1598, "task_loss": 0.6723305583000183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.871577799320221, "epoch": 1.35, "learning_rate": 4.804639804639805e-05, "loss": 0.6665, "step": 1599, "task_loss": 1.411345362663269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47707292437553406, "epoch": 1.35, "learning_rate": 4.804170188785574e-05, "loss": 0.6445, "step": 1600, "task_loss": 0.6512353420257568 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47146961092948914, "epoch": 1.35, "learning_rate": 4.803700572931342e-05, "loss": 0.7106, "step": 1601, "task_loss": 1.0617156028747559 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5479139685630798, "epoch": 1.35, "learning_rate": 4.8032309570771114e-05, "loss": 0.7178, "step": 1602, "task_loss": 0.43177318572998047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7380688190460205, "epoch": 1.35, "learning_rate": 4.80276134122288e-05, "loss": 0.5782, "step": 1603, "task_loss": 0.6076660752296448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4464404582977295, "epoch": 1.36, "learning_rate": 4.802291725368649e-05, "loss": 0.6821, "step": 1604, "task_loss": 0.3178783059120178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6880927085876465, "epoch": 1.36, "learning_rate": 4.801822109514417e-05, "loss": 0.6048, "step": 1605, "task_loss": 0.12384650856256485 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6180582046508789, "epoch": 1.36, "learning_rate": 4.801352493660186e-05, "loss": 0.6136, "step": 1606, "task_loss": 0.7588778734207153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6804318428039551, "epoch": 1.36, "learning_rate": 4.800882877805955e-05, "loss": 0.7623, "step": 1607, "task_loss": 1.3524771928787231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8903772830963135, "epoch": 1.36, "learning_rate": 4.800413261951724e-05, "loss": 0.6997, "step": 1608, "task_loss": 0.9779390692710876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49610838294029236, "epoch": 1.36, "learning_rate": 4.7999436460974925e-05, "loss": 0.4776, "step": 1609, "task_loss": 1.0638130903244019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.026877999305725, "epoch": 1.36, "learning_rate": 4.799474030243261e-05, "loss": 0.8375, "step": 1610, "task_loss": 1.1214735507965088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9383758306503296, "epoch": 1.36, "learning_rate": 4.79900441438903e-05, "loss": 0.8028, "step": 1611, "task_loss": 0.9082462787628174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5836845636367798, "epoch": 1.36, "learning_rate": 4.798534798534799e-05, "loss": 0.6073, "step": 1612, "task_loss": 0.6046208143234253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5601679086685181, "epoch": 1.36, "learning_rate": 4.798065182680567e-05, "loss": 0.6591, "step": 1613, "task_loss": 0.6604276895523071 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8007560968399048, "epoch": 1.36, "learning_rate": 4.7975955668263364e-05, "loss": 0.6474, "step": 1614, "task_loss": 1.561754584312439 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3225545883178711, "epoch": 1.36, "learning_rate": 4.797125950972105e-05, "loss": 0.4783, "step": 1615, "task_loss": 0.5013390183448792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.524358868598938, "epoch": 1.37, "learning_rate": 4.7966563351178736e-05, "loss": 0.4974, "step": 1616, "task_loss": 0.4313659071922302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5829795002937317, "epoch": 1.37, "learning_rate": 4.796186719263643e-05, "loss": 0.51, "step": 1617, "task_loss": 0.7035031318664551 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44466525316238403, "epoch": 1.37, "learning_rate": 4.795717103409411e-05, "loss": 0.5533, "step": 1618, "task_loss": 1.0078312158584595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.732083797454834, "epoch": 1.37, "learning_rate": 4.79524748755518e-05, "loss": 0.6167, "step": 1619, "task_loss": 0.5643240213394165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3727112114429474, "epoch": 1.37, "learning_rate": 4.794777871700949e-05, "loss": 0.5936, "step": 1620, "task_loss": 0.07037147134542465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9128540754318237, "epoch": 1.37, "learning_rate": 4.794308255846718e-05, "loss": 0.6988, "step": 1621, "task_loss": 0.9669749736785889 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42849409580230713, "epoch": 1.37, "learning_rate": 4.793838639992486e-05, "loss": 0.5357, "step": 1622, "task_loss": 0.619621753692627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43831974267959595, "epoch": 1.37, "learning_rate": 4.793369024138255e-05, "loss": 0.4293, "step": 1623, "task_loss": 1.2670722007751465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8166709542274475, "epoch": 1.37, "learning_rate": 4.792899408284024e-05, "loss": 0.5972, "step": 1624, "task_loss": 1.1154873371124268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4521436095237732, "epoch": 1.37, "learning_rate": 4.792429792429793e-05, "loss": 0.6869, "step": 1625, "task_loss": 0.6481726169586182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6774976253509521, "epoch": 1.37, "learning_rate": 4.791960176575561e-05, "loss": 0.6958, "step": 1626, "task_loss": 0.5851696729660034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5548114776611328, "epoch": 1.38, "learning_rate": 4.79149056072133e-05, "loss": 0.5028, "step": 1627, "task_loss": 0.9559207558631897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6879181265830994, "epoch": 1.38, "learning_rate": 4.791020944867099e-05, "loss": 0.4863, "step": 1628, "task_loss": 0.6026318073272705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6508862376213074, "epoch": 1.38, "learning_rate": 4.790551329012868e-05, "loss": 0.6992, "step": 1629, "task_loss": 0.7174429893493652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.620749294757843, "epoch": 1.38, "learning_rate": 4.7900817131586365e-05, "loss": 0.7135, "step": 1630, "task_loss": 0.8409253358840942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5580267906188965, "epoch": 1.38, "learning_rate": 4.789612097304405e-05, "loss": 0.5273, "step": 1631, "task_loss": 1.1636685132980347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5073383450508118, "epoch": 1.38, "learning_rate": 4.789142481450174e-05, "loss": 0.6805, "step": 1632, "task_loss": 1.0561354160308838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40462905168533325, "epoch": 1.38, "learning_rate": 4.788672865595943e-05, "loss": 0.4743, "step": 1633, "task_loss": 0.1622830629348755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5595773458480835, "epoch": 1.38, "learning_rate": 4.788203249741712e-05, "loss": 0.7486, "step": 1634, "task_loss": 0.9982402324676514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8400516510009766, "epoch": 1.38, "learning_rate": 4.7877336338874804e-05, "loss": 0.6852, "step": 1635, "task_loss": 0.6955478191375732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5773741006851196, "epoch": 1.38, "learning_rate": 4.787264018033249e-05, "loss": 0.63, "step": 1636, "task_loss": 1.4299110174179077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4126572906970978, "epoch": 1.38, "learning_rate": 4.7867944021790177e-05, "loss": 0.7988, "step": 1637, "task_loss": 0.3044285774230957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7871910333633423, "epoch": 1.38, "learning_rate": 4.786324786324787e-05, "loss": 0.7969, "step": 1638, "task_loss": 1.3701541423797607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6560972332954407, "epoch": 1.39, "learning_rate": 4.785855170470555e-05, "loss": 0.6326, "step": 1639, "task_loss": 0.4399296045303345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7406031489372253, "epoch": 1.39, "learning_rate": 4.785385554616324e-05, "loss": 0.6696, "step": 1640, "task_loss": 1.4316532611846924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33120280504226685, "epoch": 1.39, "learning_rate": 4.784915938762093e-05, "loss": 0.5189, "step": 1641, "task_loss": 0.6789448261260986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24124902486801147, "epoch": 1.39, "learning_rate": 4.7844463229078615e-05, "loss": 0.53, "step": 1642, "task_loss": 0.0282899122685194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7874600887298584, "epoch": 1.39, "learning_rate": 4.78397670705363e-05, "loss": 0.7338, "step": 1643, "task_loss": 0.5115172863006592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8318591713905334, "epoch": 1.39, "learning_rate": 4.783507091199399e-05, "loss": 0.5552, "step": 1644, "task_loss": 0.71821528673172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8899476528167725, "epoch": 1.39, "learning_rate": 4.783037475345168e-05, "loss": 0.6241, "step": 1645, "task_loss": 0.8202563524246216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39803147315979004, "epoch": 1.39, "learning_rate": 4.782567859490937e-05, "loss": 0.6997, "step": 1646, "task_loss": 0.29781612753868103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6491105556488037, "epoch": 1.39, "learning_rate": 4.7820982436367054e-05, "loss": 0.5621, "step": 1647, "task_loss": 1.2112702131271362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.694891095161438, "epoch": 1.39, "learning_rate": 4.781628627782474e-05, "loss": 0.5743, "step": 1648, "task_loss": 0.7192654013633728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.60300612449646, "epoch": 1.39, "learning_rate": 4.7811590119282426e-05, "loss": 0.7608, "step": 1649, "task_loss": 1.2212975025177002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4871859550476074, "epoch": 1.39, "learning_rate": 4.780689396074012e-05, "loss": 0.5695, "step": 1650, "task_loss": 0.7338263988494873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5632951259613037, "epoch": 1.4, "learning_rate": 4.7802197802197806e-05, "loss": 0.6437, "step": 1651, "task_loss": 0.8542407155036926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4331757426261902, "epoch": 1.4, "learning_rate": 4.779750164365549e-05, "loss": 0.6017, "step": 1652, "task_loss": 0.19764827191829681 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37723830342292786, "epoch": 1.4, "learning_rate": 4.779280548511318e-05, "loss": 0.5301, "step": 1653, "task_loss": 0.24991202354431152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48397183418273926, "epoch": 1.4, "learning_rate": 4.7788109326570865e-05, "loss": 0.5826, "step": 1654, "task_loss": 1.114715576171875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39961105585098267, "epoch": 1.4, "learning_rate": 4.778341316802856e-05, "loss": 0.535, "step": 1655, "task_loss": 0.09917449206113815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8142891526222229, "epoch": 1.4, "learning_rate": 4.777871700948624e-05, "loss": 0.6995, "step": 1656, "task_loss": 0.49199044704437256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3938007354736328, "epoch": 1.4, "learning_rate": 4.777402085094393e-05, "loss": 0.6571, "step": 1657, "task_loss": 1.3335747718811035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33205628395080566, "epoch": 1.4, "learning_rate": 4.776932469240162e-05, "loss": 0.596, "step": 1658, "task_loss": 0.1569209098815918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6527595520019531, "epoch": 1.4, "learning_rate": 4.776462853385931e-05, "loss": 0.4447, "step": 1659, "task_loss": 1.12248694896698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30100762844085693, "epoch": 1.4, "learning_rate": 4.775993237531699e-05, "loss": 0.4775, "step": 1660, "task_loss": 0.07592938095331192 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7510749101638794, "epoch": 1.4, "learning_rate": 4.7755236216774676e-05, "loss": 0.7437, "step": 1661, "task_loss": 0.7054663896560669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49287593364715576, "epoch": 1.4, "learning_rate": 4.775054005823237e-05, "loss": 0.4679, "step": 1662, "task_loss": 0.3924923241138458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6776936054229736, "epoch": 1.41, "learning_rate": 4.7745843899690055e-05, "loss": 0.6136, "step": 1663, "task_loss": 0.8403178453445435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8054499626159668, "epoch": 1.41, "learning_rate": 4.774114774114775e-05, "loss": 0.7737, "step": 1664, "task_loss": 0.5296772718429565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4818650484085083, "epoch": 1.41, "learning_rate": 4.773645158260543e-05, "loss": 0.5517, "step": 1665, "task_loss": 0.6134045720100403 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9701954126358032, "epoch": 1.41, "learning_rate": 4.773175542406312e-05, "loss": 0.677, "step": 1666, "task_loss": 1.0313466787338257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.829964280128479, "epoch": 1.41, "learning_rate": 4.772705926552081e-05, "loss": 0.7897, "step": 1667, "task_loss": 1.4926754236221313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39878225326538086, "epoch": 1.41, "learning_rate": 4.7722363106978494e-05, "loss": 0.4225, "step": 1668, "task_loss": 0.5044861435890198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25605255365371704, "epoch": 1.41, "learning_rate": 4.771766694843618e-05, "loss": 0.4996, "step": 1669, "task_loss": 0.855275571346283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5717999935150146, "epoch": 1.41, "learning_rate": 4.7712970789893866e-05, "loss": 0.6309, "step": 1670, "task_loss": 0.5186813473701477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3571189343929291, "epoch": 1.41, "learning_rate": 4.770827463135156e-05, "loss": 0.5238, "step": 1671, "task_loss": 0.7347651124000549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6045377254486084, "epoch": 1.41, "learning_rate": 4.7703578472809246e-05, "loss": 0.7242, "step": 1672, "task_loss": 0.49825671315193176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5678796768188477, "epoch": 1.41, "learning_rate": 4.769888231426693e-05, "loss": 0.7521, "step": 1673, "task_loss": 0.8094935417175293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43706828355789185, "epoch": 1.41, "learning_rate": 4.769418615572462e-05, "loss": 0.5182, "step": 1674, "task_loss": 0.2075897604227066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.440432608127594, "epoch": 1.42, "learning_rate": 4.7689489997182305e-05, "loss": 0.5479, "step": 1675, "task_loss": 0.36550334095954895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47649237513542175, "epoch": 1.42, "learning_rate": 4.768479383864e-05, "loss": 0.4358, "step": 1676, "task_loss": 1.0703203678131104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9140738248825073, "epoch": 1.42, "learning_rate": 4.7680097680097684e-05, "loss": 0.6524, "step": 1677, "task_loss": 0.4675435721874237 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6960176229476929, "epoch": 1.42, "learning_rate": 4.767540152155537e-05, "loss": 0.7557, "step": 1678, "task_loss": 0.7790500521659851 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6246174573898315, "epoch": 1.42, "learning_rate": 4.767070536301306e-05, "loss": 0.622, "step": 1679, "task_loss": 0.9733368754386902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5217020511627197, "epoch": 1.42, "learning_rate": 4.7666009204470743e-05, "loss": 0.5465, "step": 1680, "task_loss": 1.1167904138565063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6869937777519226, "epoch": 1.42, "learning_rate": 4.7661313045928437e-05, "loss": 0.5136, "step": 1681, "task_loss": 0.9331859350204468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4757658541202545, "epoch": 1.42, "learning_rate": 4.7656616887386116e-05, "loss": 0.4009, "step": 1682, "task_loss": 0.2403227984905243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7355430126190186, "epoch": 1.42, "learning_rate": 4.765192072884381e-05, "loss": 0.4306, "step": 1683, "task_loss": 0.23138339817523956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34489649534225464, "epoch": 1.42, "learning_rate": 4.7647224570301496e-05, "loss": 0.455, "step": 1684, "task_loss": 0.9797009229660034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7930319905281067, "epoch": 1.42, "learning_rate": 4.764252841175918e-05, "loss": 0.7432, "step": 1685, "task_loss": 0.47484952211380005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6724337935447693, "epoch": 1.42, "learning_rate": 4.763783225321687e-05, "loss": 0.6212, "step": 1686, "task_loss": 0.9217349290847778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3457496166229248, "epoch": 1.43, "learning_rate": 4.7633136094674555e-05, "loss": 0.6272, "step": 1687, "task_loss": 0.6488296985626221 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47160646319389343, "epoch": 1.43, "learning_rate": 4.762843993613225e-05, "loss": 0.6507, "step": 1688, "task_loss": 0.5439403653144836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35220447182655334, "epoch": 1.43, "learning_rate": 4.7623743777589934e-05, "loss": 0.5554, "step": 1689, "task_loss": 0.5277451276779175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4568077325820923, "epoch": 1.43, "learning_rate": 4.761904761904762e-05, "loss": 0.6937, "step": 1690, "task_loss": 0.38953086733818054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5074638724327087, "epoch": 1.43, "learning_rate": 4.761435146050531e-05, "loss": 0.5773, "step": 1691, "task_loss": 1.2159000635147095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9051076173782349, "epoch": 1.43, "learning_rate": 4.7609655301963e-05, "loss": 0.504, "step": 1692, "task_loss": 1.0934655666351318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35757479071617126, "epoch": 1.43, "learning_rate": 4.7604959143420686e-05, "loss": 0.5214, "step": 1693, "task_loss": 0.09512335807085037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.340162456035614, "epoch": 1.43, "learning_rate": 4.760026298487837e-05, "loss": 0.6442, "step": 1694, "task_loss": 0.09076585620641708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4110061526298523, "epoch": 1.43, "learning_rate": 4.759556682633606e-05, "loss": 0.623, "step": 1695, "task_loss": 1.1287000179290771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5678776502609253, "epoch": 1.43, "learning_rate": 4.7590870667793745e-05, "loss": 0.6345, "step": 1696, "task_loss": 1.043823480606079 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7658879160881042, "epoch": 1.43, "learning_rate": 4.758617450925144e-05, "loss": 0.6415, "step": 1697, "task_loss": 1.514686107635498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42536023259162903, "epoch": 1.44, "learning_rate": 4.7581478350709125e-05, "loss": 0.6042, "step": 1698, "task_loss": 0.9832414388656616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7201156616210938, "epoch": 1.44, "learning_rate": 4.757678219216681e-05, "loss": 0.7188, "step": 1699, "task_loss": 2.0409693717956543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47581902146339417, "epoch": 1.44, "learning_rate": 4.75720860336245e-05, "loss": 0.9033, "step": 1700, "task_loss": 0.7498282790184021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3424775004386902, "epoch": 1.44, "learning_rate": 4.7567389875082184e-05, "loss": 0.5107, "step": 1701, "task_loss": 0.9757051467895508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34555691480636597, "epoch": 1.44, "learning_rate": 4.756269371653988e-05, "loss": 0.54, "step": 1702, "task_loss": 0.7055360078811646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38466912508010864, "epoch": 1.44, "learning_rate": 4.7557997557997556e-05, "loss": 0.5907, "step": 1703, "task_loss": 0.8687433004379272 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4827705919742584, "epoch": 1.44, "learning_rate": 4.755330139945525e-05, "loss": 0.5367, "step": 1704, "task_loss": 0.34060388803482056 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5884590148925781, "epoch": 1.44, "learning_rate": 4.7548605240912936e-05, "loss": 0.6298, "step": 1705, "task_loss": 0.9137607216835022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5613066554069519, "epoch": 1.44, "learning_rate": 4.754390908237062e-05, "loss": 0.63, "step": 1706, "task_loss": 1.448259711265564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6658216714859009, "epoch": 1.44, "learning_rate": 4.7539212923828315e-05, "loss": 0.6147, "step": 1707, "task_loss": 0.6749851703643799 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3519976735115051, "epoch": 1.44, "learning_rate": 4.7534516765285995e-05, "loss": 0.456, "step": 1708, "task_loss": 0.21104562282562256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2317219972610474, "epoch": 1.44, "learning_rate": 4.752982060674369e-05, "loss": 0.6719, "step": 1709, "task_loss": 0.8495370149612427 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6735179424285889, "epoch": 1.45, "learning_rate": 4.7525124448201374e-05, "loss": 0.5969, "step": 1710, "task_loss": 0.5928895473480225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7466335892677307, "epoch": 1.45, "learning_rate": 4.752042828965906e-05, "loss": 0.8081, "step": 1711, "task_loss": 1.717422604560852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6467211842536926, "epoch": 1.45, "learning_rate": 4.751573213111675e-05, "loss": 0.6802, "step": 1712, "task_loss": 0.9554148316383362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30561161041259766, "epoch": 1.45, "learning_rate": 4.751103597257443e-05, "loss": 0.6214, "step": 1713, "task_loss": 0.5849863886833191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.532687783241272, "epoch": 1.45, "learning_rate": 4.7506339814032126e-05, "loss": 0.5957, "step": 1714, "task_loss": 1.7920550107955933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45694610476493835, "epoch": 1.45, "learning_rate": 4.750164365548981e-05, "loss": 0.5562, "step": 1715, "task_loss": 0.49643829464912415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41283807158470154, "epoch": 1.45, "learning_rate": 4.74969474969475e-05, "loss": 0.5554, "step": 1716, "task_loss": 0.20359176397323608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5782396793365479, "epoch": 1.45, "learning_rate": 4.7492251338405185e-05, "loss": 0.5615, "step": 1717, "task_loss": 0.9589313268661499 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5847616195678711, "epoch": 1.45, "learning_rate": 4.748755517986287e-05, "loss": 0.6409, "step": 1718, "task_loss": 1.4182993173599243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7813501358032227, "epoch": 1.45, "learning_rate": 4.7482859021320565e-05, "loss": 0.6889, "step": 1719, "task_loss": 0.7253593802452087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6458130478858948, "epoch": 1.45, "learning_rate": 4.7478162862778244e-05, "loss": 0.5989, "step": 1720, "task_loss": 1.2458233833312988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5860913991928101, "epoch": 1.45, "learning_rate": 4.747346670423594e-05, "loss": 0.4266, "step": 1721, "task_loss": 1.0707181692123413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4506617784500122, "epoch": 1.46, "learning_rate": 4.7468770545693624e-05, "loss": 0.6675, "step": 1722, "task_loss": 0.7505088448524475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41061732172966003, "epoch": 1.46, "learning_rate": 4.746407438715132e-05, "loss": 0.5776, "step": 1723, "task_loss": 0.5155606269836426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5759274363517761, "epoch": 1.46, "learning_rate": 4.7459378228609e-05, "loss": 0.5637, "step": 1724, "task_loss": 0.7650868892669678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7797831296920776, "epoch": 1.46, "learning_rate": 4.745468207006668e-05, "loss": 0.8357, "step": 1725, "task_loss": 1.4964967966079712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5510580539703369, "epoch": 1.46, "learning_rate": 4.7449985911524376e-05, "loss": 0.6004, "step": 1726, "task_loss": 0.9299523234367371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.605677604675293, "epoch": 1.46, "learning_rate": 4.744528975298206e-05, "loss": 0.5736, "step": 1727, "task_loss": 0.3727736473083496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35841381549835205, "epoch": 1.46, "learning_rate": 4.7440593594439755e-05, "loss": 0.3982, "step": 1728, "task_loss": 0.2603422999382019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42003220319747925, "epoch": 1.46, "learning_rate": 4.7435897435897435e-05, "loss": 0.5969, "step": 1729, "task_loss": 0.25327152013778687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8262138366699219, "epoch": 1.46, "learning_rate": 4.743120127735513e-05, "loss": 0.5673, "step": 1730, "task_loss": 0.4661247432231903 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.57587730884552, "epoch": 1.46, "learning_rate": 4.7426505118812814e-05, "loss": 0.5258, "step": 1731, "task_loss": 0.44535523653030396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6655893921852112, "epoch": 1.46, "learning_rate": 4.74218089602705e-05, "loss": 0.6481, "step": 1732, "task_loss": 0.48270872235298157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5686538815498352, "epoch": 1.46, "learning_rate": 4.741711280172819e-05, "loss": 0.8111, "step": 1733, "task_loss": 0.9028182029724121 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5964937210083008, "epoch": 1.47, "learning_rate": 4.7412416643185874e-05, "loss": 0.5296, "step": 1734, "task_loss": 1.5828986167907715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5032129883766174, "epoch": 1.47, "learning_rate": 4.740772048464357e-05, "loss": 0.4076, "step": 1735, "task_loss": 0.3037869334220886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7859777212142944, "epoch": 1.47, "learning_rate": 4.740302432610125e-05, "loss": 0.537, "step": 1736, "task_loss": 0.9161178469657898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8133358955383301, "epoch": 1.47, "learning_rate": 4.739832816755894e-05, "loss": 0.6102, "step": 1737, "task_loss": 0.42718926072120667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5305310487747192, "epoch": 1.47, "learning_rate": 4.7393632009016626e-05, "loss": 0.5604, "step": 1738, "task_loss": 1.20778226852417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4829508662223816, "epoch": 1.47, "learning_rate": 4.738893585047431e-05, "loss": 0.6428, "step": 1739, "task_loss": 0.4437018632888794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48079660534858704, "epoch": 1.47, "learning_rate": 4.7384239691932005e-05, "loss": 0.5489, "step": 1740, "task_loss": 0.27588245272636414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37160369753837585, "epoch": 1.47, "learning_rate": 4.737954353338969e-05, "loss": 0.5273, "step": 1741, "task_loss": 1.4583673477172852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5649614930152893, "epoch": 1.47, "learning_rate": 4.737484737484738e-05, "loss": 0.5959, "step": 1742, "task_loss": 1.7888894081115723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.783892035484314, "epoch": 1.47, "learning_rate": 4.7370151216305064e-05, "loss": 0.6013, "step": 1743, "task_loss": 1.0030874013900757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4861801564693451, "epoch": 1.47, "learning_rate": 4.736545505776275e-05, "loss": 0.5325, "step": 1744, "task_loss": 0.582534909248352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40477079153060913, "epoch": 1.47, "learning_rate": 4.7360758899220444e-05, "loss": 0.667, "step": 1745, "task_loss": 0.11448536068201065 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7338697910308838, "epoch": 1.48, "learning_rate": 4.735606274067812e-05, "loss": 0.6451, "step": 1746, "task_loss": 1.7985705137252808 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7247586846351624, "epoch": 1.48, "learning_rate": 4.7351366582135816e-05, "loss": 0.6103, "step": 1747, "task_loss": 0.43474605679512024 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4819464683532715, "epoch": 1.48, "learning_rate": 4.73466704235935e-05, "loss": 0.6142, "step": 1748, "task_loss": 1.1048026084899902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7888747453689575, "epoch": 1.48, "learning_rate": 4.734197426505119e-05, "loss": 0.6468, "step": 1749, "task_loss": 1.1516802310943604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3399815261363983, "epoch": 1.48, "learning_rate": 4.7337278106508875e-05, "loss": 0.4871, "step": 1750, "task_loss": 0.8100622892379761 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8505150675773621, "epoch": 1.48, "learning_rate": 4.733258194796656e-05, "loss": 0.5982, "step": 1751, "task_loss": 1.3673921823501587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30416804552078247, "epoch": 1.48, "learning_rate": 4.7327885789424255e-05, "loss": 0.5303, "step": 1752, "task_loss": 0.2728438973426819 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33281874656677246, "epoch": 1.48, "learning_rate": 4.732318963088194e-05, "loss": 0.4996, "step": 1753, "task_loss": 0.4237552881240845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5973111391067505, "epoch": 1.48, "learning_rate": 4.7318493472339634e-05, "loss": 0.5576, "step": 1754, "task_loss": 0.5479548573493958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.633161187171936, "epoch": 1.48, "learning_rate": 4.7313797313797314e-05, "loss": 0.6279, "step": 1755, "task_loss": 0.8390459418296814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6329614520072937, "epoch": 1.48, "learning_rate": 4.7309101155255e-05, "loss": 0.4845, "step": 1756, "task_loss": 0.7341454029083252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6910947561264038, "epoch": 1.48, "learning_rate": 4.730440499671269e-05, "loss": 0.6181, "step": 1757, "task_loss": 1.3170099258422852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6473876237869263, "epoch": 1.49, "learning_rate": 4.729970883817038e-05, "loss": 0.7139, "step": 1758, "task_loss": 1.1461905241012573 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5031329989433289, "epoch": 1.49, "learning_rate": 4.7295012679628066e-05, "loss": 0.616, "step": 1759, "task_loss": 0.5809543132781982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45421546697616577, "epoch": 1.49, "learning_rate": 4.729031652108575e-05, "loss": 0.6293, "step": 1760, "task_loss": 1.873528242111206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6404221653938293, "epoch": 1.49, "learning_rate": 4.7285620362543445e-05, "loss": 0.6145, "step": 1761, "task_loss": 1.179452657699585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8770171999931335, "epoch": 1.49, "learning_rate": 4.728092420400113e-05, "loss": 0.7764, "step": 1762, "task_loss": 0.9194822311401367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6414819359779358, "epoch": 1.49, "learning_rate": 4.727622804545882e-05, "loss": 0.6281, "step": 1763, "task_loss": 0.4064786434173584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49923160672187805, "epoch": 1.49, "learning_rate": 4.7271531886916504e-05, "loss": 0.5454, "step": 1764, "task_loss": 0.19895872473716736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2579103112220764, "epoch": 1.49, "learning_rate": 4.726683572837419e-05, "loss": 0.4545, "step": 1765, "task_loss": 0.13642677664756775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47878575325012207, "epoch": 1.49, "learning_rate": 4.7262139569831884e-05, "loss": 0.5723, "step": 1766, "task_loss": 0.5587869882583618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.831209659576416, "epoch": 1.49, "learning_rate": 4.725744341128956e-05, "loss": 0.6623, "step": 1767, "task_loss": 0.9326614141464233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6141472458839417, "epoch": 1.49, "learning_rate": 4.7252747252747257e-05, "loss": 0.5907, "step": 1768, "task_loss": 0.8500573635101318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7704299688339233, "epoch": 1.5, "learning_rate": 4.724805109420494e-05, "loss": 0.6076, "step": 1769, "task_loss": 0.8073357343673706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6056827306747437, "epoch": 1.5, "learning_rate": 4.724335493566263e-05, "loss": 0.6262, "step": 1770, "task_loss": 1.2665393352508545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5427954792976379, "epoch": 1.5, "learning_rate": 4.723865877712032e-05, "loss": 0.612, "step": 1771, "task_loss": 0.6022998690605164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6004528999328613, "epoch": 1.5, "learning_rate": 4.7233962618578e-05, "loss": 0.5863, "step": 1772, "task_loss": 1.253340721130371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.60202556848526, "epoch": 1.5, "learning_rate": 4.7229266460035695e-05, "loss": 0.6279, "step": 1773, "task_loss": 1.006077527999878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6889404058456421, "epoch": 1.5, "learning_rate": 4.722457030149338e-05, "loss": 0.5472, "step": 1774, "task_loss": 1.4250603914260864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3503163158893585, "epoch": 1.5, "learning_rate": 4.721987414295107e-05, "loss": 0.352, "step": 1775, "task_loss": 0.13469186425209045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7070242166519165, "epoch": 1.5, "learning_rate": 4.7215177984408754e-05, "loss": 0.6332, "step": 1776, "task_loss": 1.0148065090179443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47371625900268555, "epoch": 1.5, "learning_rate": 4.721048182586644e-05, "loss": 0.4939, "step": 1777, "task_loss": 0.784085750579834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5706866383552551, "epoch": 1.5, "learning_rate": 4.7205785667324133e-05, "loss": 0.5514, "step": 1778, "task_loss": 0.38917282223701477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5023443698883057, "epoch": 1.5, "learning_rate": 4.720108950878182e-05, "loss": 0.4471, "step": 1779, "task_loss": 0.24280396103858948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34125107526779175, "epoch": 1.5, "learning_rate": 4.7196393350239506e-05, "loss": 0.5218, "step": 1780, "task_loss": 1.0850849151611328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7441297769546509, "epoch": 1.51, "learning_rate": 4.719169719169719e-05, "loss": 0.7367, "step": 1781, "task_loss": 0.965580403804779 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4740007221698761, "epoch": 1.51, "learning_rate": 4.718700103315488e-05, "loss": 0.5956, "step": 1782, "task_loss": 0.47654905915260315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5822681188583374, "epoch": 1.51, "learning_rate": 4.718230487461257e-05, "loss": 0.5995, "step": 1783, "task_loss": 1.0966339111328125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4473939836025238, "epoch": 1.51, "learning_rate": 4.717760871607026e-05, "loss": 0.6485, "step": 1784, "task_loss": 1.4719487428665161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4880802631378174, "epoch": 1.51, "learning_rate": 4.7172912557527945e-05, "loss": 0.6511, "step": 1785, "task_loss": 0.4111056327819824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6888054609298706, "epoch": 1.51, "learning_rate": 4.716821639898563e-05, "loss": 0.5603, "step": 1786, "task_loss": 0.998449444770813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6950190663337708, "epoch": 1.51, "learning_rate": 4.7163520240443324e-05, "loss": 0.592, "step": 1787, "task_loss": 0.5111843347549438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5465474128723145, "epoch": 1.51, "learning_rate": 4.715882408190101e-05, "loss": 0.4871, "step": 1788, "task_loss": 1.3033368587493896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5756257176399231, "epoch": 1.51, "learning_rate": 4.715412792335869e-05, "loss": 0.6091, "step": 1789, "task_loss": 1.8112893104553223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36701101064682007, "epoch": 1.51, "learning_rate": 4.714943176481638e-05, "loss": 0.4259, "step": 1790, "task_loss": 0.42894846200942993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4413785934448242, "epoch": 1.51, "learning_rate": 4.714473560627407e-05, "loss": 0.6071, "step": 1791, "task_loss": 0.27425217628479004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9340062141418457, "epoch": 1.51, "learning_rate": 4.714003944773176e-05, "loss": 0.7747, "step": 1792, "task_loss": 1.32625150680542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5891784429550171, "epoch": 1.52, "learning_rate": 4.713534328918944e-05, "loss": 0.5958, "step": 1793, "task_loss": 0.1097603514790535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3353285789489746, "epoch": 1.52, "learning_rate": 4.7130647130647135e-05, "loss": 0.4029, "step": 1794, "task_loss": 0.45365604758262634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5513407588005066, "epoch": 1.52, "learning_rate": 4.712595097210482e-05, "loss": 0.6134, "step": 1795, "task_loss": 0.750683605670929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3004615902900696, "epoch": 1.52, "learning_rate": 4.712125481356251e-05, "loss": 0.5012, "step": 1796, "task_loss": 0.299651563167572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33866167068481445, "epoch": 1.52, "learning_rate": 4.7116558655020194e-05, "loss": 0.5481, "step": 1797, "task_loss": 0.5677942037582397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49999064207077026, "epoch": 1.52, "learning_rate": 4.711186249647788e-05, "loss": 0.6617, "step": 1798, "task_loss": 1.4898039102554321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5151455402374268, "epoch": 1.52, "learning_rate": 4.7107166337935574e-05, "loss": 0.5717, "step": 1799, "task_loss": 1.9031875133514404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.629157304763794, "epoch": 1.52, "learning_rate": 4.710247017939326e-05, "loss": 0.4723, "step": 1800, "task_loss": 0.525413453578949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6303220391273499, "epoch": 1.52, "learning_rate": 4.7097774020850946e-05, "loss": 0.587, "step": 1801, "task_loss": 0.3995893597602844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8263447284698486, "epoch": 1.52, "learning_rate": 4.709307786230863e-05, "loss": 0.7051, "step": 1802, "task_loss": 0.3925260305404663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6280578970909119, "epoch": 1.52, "learning_rate": 4.708838170376632e-05, "loss": 0.6614, "step": 1803, "task_loss": 1.6877999305725098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.509369432926178, "epoch": 1.52, "learning_rate": 4.708368554522401e-05, "loss": 0.5877, "step": 1804, "task_loss": 0.31087028980255127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7636853456497192, "epoch": 1.53, "learning_rate": 4.70789893866817e-05, "loss": 0.5501, "step": 1805, "task_loss": 1.8930209875106812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.506576657295227, "epoch": 1.53, "learning_rate": 4.7074293228139385e-05, "loss": 0.5141, "step": 1806, "task_loss": 0.9447920918464661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38242024183273315, "epoch": 1.53, "learning_rate": 4.706959706959707e-05, "loss": 0.407, "step": 1807, "task_loss": 0.8218205571174622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7456769943237305, "epoch": 1.53, "learning_rate": 4.706490091105476e-05, "loss": 0.6892, "step": 1808, "task_loss": 0.8736859560012817 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4574276804924011, "epoch": 1.53, "learning_rate": 4.706020475251245e-05, "loss": 0.5243, "step": 1809, "task_loss": 1.7474349737167358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45927631855010986, "epoch": 1.53, "learning_rate": 4.705550859397013e-05, "loss": 0.5602, "step": 1810, "task_loss": 0.2509227991104126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.696647584438324, "epoch": 1.53, "learning_rate": 4.705081243542782e-05, "loss": 0.679, "step": 1811, "task_loss": 0.6372109055519104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7950998544692993, "epoch": 1.53, "learning_rate": 4.704611627688551e-05, "loss": 0.759, "step": 1812, "task_loss": 1.3148596286773682 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44484224915504456, "epoch": 1.53, "learning_rate": 4.7041420118343196e-05, "loss": 0.6028, "step": 1813, "task_loss": 0.46240314841270447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6550801992416382, "epoch": 1.53, "learning_rate": 4.703672395980088e-05, "loss": 0.7117, "step": 1814, "task_loss": 0.7309983372688293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9140268564224243, "epoch": 1.53, "learning_rate": 4.703202780125857e-05, "loss": 0.6475, "step": 1815, "task_loss": 0.8600244522094727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48699596524238586, "epoch": 1.53, "learning_rate": 4.702733164271626e-05, "loss": 0.5915, "step": 1816, "task_loss": 1.091271162033081 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48988908529281616, "epoch": 1.54, "learning_rate": 4.702263548417395e-05, "loss": 0.6931, "step": 1817, "task_loss": 0.7432475686073303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6374232769012451, "epoch": 1.54, "learning_rate": 4.701793932563164e-05, "loss": 0.6207, "step": 1818, "task_loss": 0.4693811535835266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4014149010181427, "epoch": 1.54, "learning_rate": 4.701324316708932e-05, "loss": 0.6216, "step": 1819, "task_loss": 0.8259322643280029 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5405399799346924, "epoch": 1.54, "learning_rate": 4.700854700854701e-05, "loss": 0.6434, "step": 1820, "task_loss": 0.7887128591537476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7954394221305847, "epoch": 1.54, "learning_rate": 4.70038508500047e-05, "loss": 0.665, "step": 1821, "task_loss": 1.2190734148025513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7359347343444824, "epoch": 1.54, "learning_rate": 4.6999154691462387e-05, "loss": 0.6513, "step": 1822, "task_loss": 0.4888728857040405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40065819025039673, "epoch": 1.54, "learning_rate": 4.699445853292007e-05, "loss": 0.5936, "step": 1823, "task_loss": 0.7386007308959961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46547800302505493, "epoch": 1.54, "learning_rate": 4.698976237437776e-05, "loss": 0.6591, "step": 1824, "task_loss": 0.6957730054855347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45333337783813477, "epoch": 1.54, "learning_rate": 4.698506621583545e-05, "loss": 0.4594, "step": 1825, "task_loss": 0.4932689070701599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5324752330780029, "epoch": 1.54, "learning_rate": 4.698037005729314e-05, "loss": 0.5061, "step": 1826, "task_loss": 1.251338005065918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28957247734069824, "epoch": 1.54, "learning_rate": 4.697567389875082e-05, "loss": 0.4282, "step": 1827, "task_loss": 0.20220570266246796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5788820385932922, "epoch": 1.54, "learning_rate": 4.697097774020851e-05, "loss": 0.6104, "step": 1828, "task_loss": 1.4940811395645142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3608841896057129, "epoch": 1.55, "learning_rate": 4.69662815816662e-05, "loss": 0.4642, "step": 1829, "task_loss": 0.7415425777435303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5830626487731934, "epoch": 1.55, "learning_rate": 4.696158542312389e-05, "loss": 0.5703, "step": 1830, "task_loss": 1.2234995365142822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4922773241996765, "epoch": 1.55, "learning_rate": 4.695688926458158e-05, "loss": 0.6797, "step": 1831, "task_loss": 0.7446616888046265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5748810768127441, "epoch": 1.55, "learning_rate": 4.6952193106039264e-05, "loss": 0.5767, "step": 1832, "task_loss": 0.940068781375885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4408746361732483, "epoch": 1.55, "learning_rate": 4.694749694749695e-05, "loss": 0.4893, "step": 1833, "task_loss": 0.862897515296936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44466403126716614, "epoch": 1.55, "learning_rate": 4.6942800788954636e-05, "loss": 0.4521, "step": 1834, "task_loss": 1.4267512559890747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6717203259468079, "epoch": 1.55, "learning_rate": 4.693810463041233e-05, "loss": 0.5785, "step": 1835, "task_loss": 0.517787754535675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7684582471847534, "epoch": 1.55, "learning_rate": 4.693340847187001e-05, "loss": 0.6046, "step": 1836, "task_loss": 1.2440636157989502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38898685574531555, "epoch": 1.55, "learning_rate": 4.69287123133277e-05, "loss": 0.5795, "step": 1837, "task_loss": 0.4221350848674774 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4502114951610565, "epoch": 1.55, "learning_rate": 4.692401615478539e-05, "loss": 0.521, "step": 1838, "task_loss": 0.6535466313362122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4455365538597107, "epoch": 1.55, "learning_rate": 4.6919319996243075e-05, "loss": 0.5997, "step": 1839, "task_loss": 0.5845792889595032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28955110907554626, "epoch": 1.56, "learning_rate": 4.691462383770076e-05, "loss": 0.4973, "step": 1840, "task_loss": 1.0131090879440308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6053062081336975, "epoch": 1.56, "learning_rate": 4.690992767915845e-05, "loss": 0.5397, "step": 1841, "task_loss": 0.8996180295944214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45658376812934875, "epoch": 1.56, "learning_rate": 4.690523152061614e-05, "loss": 0.5595, "step": 1842, "task_loss": 1.2552802562713623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4256875813007355, "epoch": 1.56, "learning_rate": 4.690053536207383e-05, "loss": 0.5796, "step": 1843, "task_loss": 0.5388075709342957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5713492035865784, "epoch": 1.56, "learning_rate": 4.689583920353151e-05, "loss": 0.4867, "step": 1844, "task_loss": 0.2089112102985382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.622326135635376, "epoch": 1.56, "learning_rate": 4.68911430449892e-05, "loss": 0.5923, "step": 1845, "task_loss": 0.5002516508102417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6320657730102539, "epoch": 1.56, "learning_rate": 4.6886446886446886e-05, "loss": 0.6553, "step": 1846, "task_loss": 0.8069219589233398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6048786640167236, "epoch": 1.56, "learning_rate": 4.688175072790458e-05, "loss": 0.4569, "step": 1847, "task_loss": 0.9808427691459656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7076330184936523, "epoch": 1.56, "learning_rate": 4.6877054569362265e-05, "loss": 0.5645, "step": 1848, "task_loss": 0.6833792924880981 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6158542633056641, "epoch": 1.56, "learning_rate": 4.687235841081995e-05, "loss": 0.5772, "step": 1849, "task_loss": 1.5533432960510254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5241901874542236, "epoch": 1.56, "learning_rate": 4.686766225227764e-05, "loss": 0.4905, "step": 1850, "task_loss": 0.6007300019264221 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6578302383422852, "epoch": 1.56, "learning_rate": 4.6862966093735324e-05, "loss": 0.5248, "step": 1851, "task_loss": 0.686032772064209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4665876030921936, "epoch": 1.57, "learning_rate": 4.685826993519302e-05, "loss": 0.5202, "step": 1852, "task_loss": 0.8151183724403381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9610785841941833, "epoch": 1.57, "learning_rate": 4.68535737766507e-05, "loss": 0.6477, "step": 1853, "task_loss": 1.177754521369934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5598584413528442, "epoch": 1.57, "learning_rate": 4.684887761810839e-05, "loss": 0.626, "step": 1854, "task_loss": 0.7493035793304443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5410453081130981, "epoch": 1.57, "learning_rate": 4.6844181459566076e-05, "loss": 0.6075, "step": 1855, "task_loss": 0.4986797869205475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24422243237495422, "epoch": 1.57, "learning_rate": 4.683948530102377e-05, "loss": 0.5093, "step": 1856, "task_loss": 0.08972012996673584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4579392671585083, "epoch": 1.57, "learning_rate": 4.683478914248145e-05, "loss": 0.5803, "step": 1857, "task_loss": 0.3766317069530487 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5779644846916199, "epoch": 1.57, "learning_rate": 4.683009298393914e-05, "loss": 0.493, "step": 1858, "task_loss": 0.5334076881408691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8530150055885315, "epoch": 1.57, "learning_rate": 4.682539682539683e-05, "loss": 0.7654, "step": 1859, "task_loss": 1.7978960275650024 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7706997394561768, "epoch": 1.57, "learning_rate": 4.6820700666854515e-05, "loss": 0.7488, "step": 1860, "task_loss": 1.7747673988342285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.333295613527298, "epoch": 1.57, "learning_rate": 4.681600450831221e-05, "loss": 0.4435, "step": 1861, "task_loss": 0.2296253740787506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5368047952651978, "epoch": 1.57, "learning_rate": 4.681130834976989e-05, "loss": 0.5067, "step": 1862, "task_loss": 0.759662926197052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4698890149593353, "epoch": 1.57, "learning_rate": 4.680661219122758e-05, "loss": 0.5027, "step": 1863, "task_loss": 0.2978939712047577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4717600643634796, "epoch": 1.58, "learning_rate": 4.680191603268527e-05, "loss": 0.5042, "step": 1864, "task_loss": 1.1161022186279297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5680921673774719, "epoch": 1.58, "learning_rate": 4.6797219874142953e-05, "loss": 0.7499, "step": 1865, "task_loss": 1.5024515390396118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44387954473495483, "epoch": 1.58, "learning_rate": 4.679252371560064e-05, "loss": 0.5682, "step": 1866, "task_loss": 0.3542553782463074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.465790718793869, "epoch": 1.58, "learning_rate": 4.6787827557058326e-05, "loss": 0.6977, "step": 1867, "task_loss": 0.8902647495269775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42614203691482544, "epoch": 1.58, "learning_rate": 4.678313139851602e-05, "loss": 0.5771, "step": 1868, "task_loss": 0.47765904664993286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32773590087890625, "epoch": 1.58, "learning_rate": 4.6778435239973706e-05, "loss": 0.6341, "step": 1869, "task_loss": 0.9472421407699585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5432841777801514, "epoch": 1.58, "learning_rate": 4.677373908143139e-05, "loss": 0.5389, "step": 1870, "task_loss": 0.30233752727508545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8167489171028137, "epoch": 1.58, "learning_rate": 4.676904292288908e-05, "loss": 0.5751, "step": 1871, "task_loss": 1.3057947158813477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5769727826118469, "epoch": 1.58, "learning_rate": 4.6764346764346765e-05, "loss": 0.6976, "step": 1872, "task_loss": 0.4379667043685913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49119219183921814, "epoch": 1.58, "learning_rate": 4.675965060580446e-05, "loss": 0.4706, "step": 1873, "task_loss": 0.7343567609786987 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3757442831993103, "epoch": 1.58, "learning_rate": 4.675495444726214e-05, "loss": 0.5302, "step": 1874, "task_loss": 0.7025794982910156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7963919639587402, "epoch": 1.58, "learning_rate": 4.675025828871983e-05, "loss": 0.5314, "step": 1875, "task_loss": 0.704535722732544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32425594329833984, "epoch": 1.59, "learning_rate": 4.674556213017752e-05, "loss": 0.5072, "step": 1876, "task_loss": 0.2208862155675888 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33119064569473267, "epoch": 1.59, "learning_rate": 4.67408659716352e-05, "loss": 0.5069, "step": 1877, "task_loss": 0.351622998714447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3916580379009247, "epoch": 1.59, "learning_rate": 4.6736169813092896e-05, "loss": 0.6275, "step": 1878, "task_loss": 0.37876659631729126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6204522848129272, "epoch": 1.59, "learning_rate": 4.6731473654550576e-05, "loss": 0.5546, "step": 1879, "task_loss": 0.5438115000724792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7697974443435669, "epoch": 1.59, "learning_rate": 4.672677749600827e-05, "loss": 0.6121, "step": 1880, "task_loss": 0.9159539937973022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5785267949104309, "epoch": 1.59, "learning_rate": 4.6722081337465955e-05, "loss": 0.5106, "step": 1881, "task_loss": 0.8526297211647034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5211663246154785, "epoch": 1.59, "learning_rate": 4.671738517892365e-05, "loss": 0.5627, "step": 1882, "task_loss": 0.1651502400636673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3880019187927246, "epoch": 1.59, "learning_rate": 4.671268902038133e-05, "loss": 0.497, "step": 1883, "task_loss": 0.7720074653625488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4831169843673706, "epoch": 1.59, "learning_rate": 4.6707992861839014e-05, "loss": 0.6103, "step": 1884, "task_loss": 0.6218484044075012 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2888110280036926, "epoch": 1.59, "learning_rate": 4.670329670329671e-05, "loss": 0.5842, "step": 1885, "task_loss": 0.6646854281425476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27837032079696655, "epoch": 1.59, "learning_rate": 4.6698600544754394e-05, "loss": 0.6027, "step": 1886, "task_loss": 0.45071932673454285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5891077518463135, "epoch": 1.59, "learning_rate": 4.669390438621208e-05, "loss": 0.5283, "step": 1887, "task_loss": 0.8438980579376221 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7927333116531372, "epoch": 1.6, "learning_rate": 4.6689208227669766e-05, "loss": 0.6143, "step": 1888, "task_loss": 1.4190785884857178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6865806579589844, "epoch": 1.6, "learning_rate": 4.668451206912746e-05, "loss": 0.6325, "step": 1889, "task_loss": 0.7962665557861328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5363432168960571, "epoch": 1.6, "learning_rate": 4.6679815910585146e-05, "loss": 0.8227, "step": 1890, "task_loss": 1.798419713973999 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7652624845504761, "epoch": 1.6, "learning_rate": 4.6675119752042825e-05, "loss": 0.5887, "step": 1891, "task_loss": 1.0288584232330322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5784634351730347, "epoch": 1.6, "learning_rate": 4.667042359350052e-05, "loss": 0.4328, "step": 1892, "task_loss": 0.34006738662719727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6674325466156006, "epoch": 1.6, "learning_rate": 4.6665727434958205e-05, "loss": 0.6813, "step": 1893, "task_loss": 1.6065599918365479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47783058881759644, "epoch": 1.6, "learning_rate": 4.66610312764159e-05, "loss": 0.5931, "step": 1894, "task_loss": 0.20857645571231842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5658266544342041, "epoch": 1.6, "learning_rate": 4.6656335117873584e-05, "loss": 0.5214, "step": 1895, "task_loss": 1.3436154127120972 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4773738384246826, "epoch": 1.6, "learning_rate": 4.665163895933127e-05, "loss": 0.5527, "step": 1896, "task_loss": 1.1610950231552124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6483334898948669, "epoch": 1.6, "learning_rate": 4.664694280078896e-05, "loss": 0.5936, "step": 1897, "task_loss": 1.0922473669052124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2957175374031067, "epoch": 1.6, "learning_rate": 4.664224664224664e-05, "loss": 0.4892, "step": 1898, "task_loss": 0.4227175712585449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34253326058387756, "epoch": 1.6, "learning_rate": 4.6637550483704336e-05, "loss": 0.5199, "step": 1899, "task_loss": 0.5648970603942871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6749915480613708, "epoch": 1.61, "learning_rate": 4.6632854325162016e-05, "loss": 0.4627, "step": 1900, "task_loss": 0.45267102122306824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5684672594070435, "epoch": 1.61, "learning_rate": 4.662815816661971e-05, "loss": 0.6276, "step": 1901, "task_loss": 0.8845635056495667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3277769088745117, "epoch": 1.61, "learning_rate": 4.6623462008077395e-05, "loss": 0.4532, "step": 1902, "task_loss": 0.20672525465488434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3849254846572876, "epoch": 1.61, "learning_rate": 4.661876584953508e-05, "loss": 0.6368, "step": 1903, "task_loss": 0.5823482871055603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4199620187282562, "epoch": 1.61, "learning_rate": 4.661406969099277e-05, "loss": 0.4973, "step": 1904, "task_loss": 0.7264372110366821 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3507782220840454, "epoch": 1.61, "learning_rate": 4.6609373532450454e-05, "loss": 0.5347, "step": 1905, "task_loss": 0.833257794380188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.722603440284729, "epoch": 1.61, "learning_rate": 4.660467737390815e-05, "loss": 0.5188, "step": 1906, "task_loss": 1.2134987115859985 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8592895269393921, "epoch": 1.61, "learning_rate": 4.6599981215365834e-05, "loss": 0.6762, "step": 1907, "task_loss": 0.969892680644989 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.65284264087677, "epoch": 1.61, "learning_rate": 4.659528505682352e-05, "loss": 0.6401, "step": 1908, "task_loss": 0.7907371520996094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6005685925483704, "epoch": 1.61, "learning_rate": 4.6590588898281207e-05, "loss": 0.7769, "step": 1909, "task_loss": 0.32758018374443054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4204409122467041, "epoch": 1.61, "learning_rate": 4.658589273973889e-05, "loss": 0.509, "step": 1910, "task_loss": 1.0150146484375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5055959224700928, "epoch": 1.61, "learning_rate": 4.6581196581196586e-05, "loss": 0.4438, "step": 1911, "task_loss": 1.1137009859085083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.583311915397644, "epoch": 1.62, "learning_rate": 4.657650042265427e-05, "loss": 0.5553, "step": 1912, "task_loss": 1.0932053327560425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4660899043083191, "epoch": 1.62, "learning_rate": 4.657180426411196e-05, "loss": 0.6377, "step": 1913, "task_loss": 0.6193029284477234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7896553874015808, "epoch": 1.62, "learning_rate": 4.6567108105569645e-05, "loss": 0.6236, "step": 1914, "task_loss": 0.6770513653755188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48534470796585083, "epoch": 1.62, "learning_rate": 4.656241194702733e-05, "loss": 0.5194, "step": 1915, "task_loss": 0.6714147329330444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3645036518573761, "epoch": 1.62, "learning_rate": 4.6557715788485025e-05, "loss": 0.5743, "step": 1916, "task_loss": 0.930041491985321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47719937562942505, "epoch": 1.62, "learning_rate": 4.6553019629942704e-05, "loss": 0.5443, "step": 1917, "task_loss": 0.5547931790351868 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5332576632499695, "epoch": 1.62, "learning_rate": 4.65483234714004e-05, "loss": 0.4994, "step": 1918, "task_loss": 0.45531582832336426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6089877486228943, "epoch": 1.62, "learning_rate": 4.6543627312858084e-05, "loss": 0.5749, "step": 1919, "task_loss": 0.6478610634803772 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6481602787971497, "epoch": 1.62, "learning_rate": 4.653893115431578e-05, "loss": 0.627, "step": 1920, "task_loss": 0.8437278866767883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6896153688430786, "epoch": 1.62, "learning_rate": 4.6534234995773456e-05, "loss": 0.5645, "step": 1921, "task_loss": 1.788332223892212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5446599721908569, "epoch": 1.62, "learning_rate": 4.652953883723114e-05, "loss": 0.5084, "step": 1922, "task_loss": 0.7825576663017273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3158700168132782, "epoch": 1.63, "learning_rate": 4.6524842678688836e-05, "loss": 0.5455, "step": 1923, "task_loss": 0.15550537407398224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4869685769081116, "epoch": 1.63, "learning_rate": 4.652014652014652e-05, "loss": 0.4293, "step": 1924, "task_loss": 0.750139594078064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5200086832046509, "epoch": 1.63, "learning_rate": 4.6515450361604215e-05, "loss": 0.586, "step": 1925, "task_loss": 1.3922884464263916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46994614601135254, "epoch": 1.63, "learning_rate": 4.6510754203061895e-05, "loss": 0.5631, "step": 1926, "task_loss": 0.8143450021743774 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28100305795669556, "epoch": 1.63, "learning_rate": 4.650605804451959e-05, "loss": 0.4496, "step": 1927, "task_loss": 0.43802034854888916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.455371618270874, "epoch": 1.63, "learning_rate": 4.6501361885977274e-05, "loss": 0.5932, "step": 1928, "task_loss": 1.5162861347198486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.620887279510498, "epoch": 1.63, "learning_rate": 4.649666572743496e-05, "loss": 0.5614, "step": 1929, "task_loss": 0.3991503119468689 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5860304832458496, "epoch": 1.63, "learning_rate": 4.649196956889265e-05, "loss": 0.6906, "step": 1930, "task_loss": 0.6031410694122314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5725762844085693, "epoch": 1.63, "learning_rate": 4.648727341035033e-05, "loss": 0.6038, "step": 1931, "task_loss": 1.9275588989257812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6185287833213806, "epoch": 1.63, "learning_rate": 4.6482577251808026e-05, "loss": 0.6394, "step": 1932, "task_loss": 0.5178365707397461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6467101573944092, "epoch": 1.63, "learning_rate": 4.647788109326571e-05, "loss": 0.5769, "step": 1933, "task_loss": 1.0293409824371338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2547793984413147, "epoch": 1.63, "learning_rate": 4.64731849347234e-05, "loss": 0.5055, "step": 1934, "task_loss": 0.07038739323616028 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44193142652511597, "epoch": 1.64, "learning_rate": 4.6468488776181085e-05, "loss": 0.5663, "step": 1935, "task_loss": 0.9876991510391235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7976207137107849, "epoch": 1.64, "learning_rate": 4.646379261763877e-05, "loss": 0.6006, "step": 1936, "task_loss": 0.5815891027450562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6197544932365417, "epoch": 1.64, "learning_rate": 4.6459096459096465e-05, "loss": 0.5652, "step": 1937, "task_loss": 0.4888664186000824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39842820167541504, "epoch": 1.64, "learning_rate": 4.645440030055415e-05, "loss": 0.5173, "step": 1938, "task_loss": 0.670062243938446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7301890850067139, "epoch": 1.64, "learning_rate": 4.644970414201184e-05, "loss": 0.5976, "step": 1939, "task_loss": 1.3496747016906738 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3726184070110321, "epoch": 1.64, "learning_rate": 4.6445007983469524e-05, "loss": 0.4341, "step": 1940, "task_loss": 0.5825750231742859 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4145577847957611, "epoch": 1.64, "learning_rate": 4.644031182492721e-05, "loss": 0.4319, "step": 1941, "task_loss": 0.7643408179283142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35096317529678345, "epoch": 1.64, "learning_rate": 4.64356156663849e-05, "loss": 0.556, "step": 1942, "task_loss": 0.6043316125869751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4194242060184479, "epoch": 1.64, "learning_rate": 4.643091950784258e-05, "loss": 0.6013, "step": 1943, "task_loss": 0.7272207736968994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3347339332103729, "epoch": 1.64, "learning_rate": 4.6426223349300276e-05, "loss": 0.4598, "step": 1944, "task_loss": 0.7678591012954712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4394766092300415, "epoch": 1.64, "learning_rate": 4.642152719075796e-05, "loss": 0.6409, "step": 1945, "task_loss": 0.4446955919265747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5237839221954346, "epoch": 1.64, "learning_rate": 4.641683103221565e-05, "loss": 0.608, "step": 1946, "task_loss": 1.3165702819824219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3522550165653229, "epoch": 1.65, "learning_rate": 4.6412134873673335e-05, "loss": 0.4813, "step": 1947, "task_loss": 0.5665919184684753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7713543772697449, "epoch": 1.65, "learning_rate": 4.640743871513102e-05, "loss": 0.7532, "step": 1948, "task_loss": 1.149672031402588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6303111910820007, "epoch": 1.65, "learning_rate": 4.6402742556588714e-05, "loss": 0.6287, "step": 1949, "task_loss": 1.1541316509246826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4204871654510498, "epoch": 1.65, "learning_rate": 4.63980463980464e-05, "loss": 0.5536, "step": 1950, "task_loss": 0.683290958404541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45184242725372314, "epoch": 1.65, "learning_rate": 4.639335023950409e-05, "loss": 0.5491, "step": 1951, "task_loss": 1.282523512840271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42054784297943115, "epoch": 1.65, "learning_rate": 4.6388654080961773e-05, "loss": 0.5575, "step": 1952, "task_loss": 0.3799220025539398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6612525582313538, "epoch": 1.65, "learning_rate": 4.6383957922419467e-05, "loss": 0.6076, "step": 1953, "task_loss": 0.41429269313812256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6542116403579712, "epoch": 1.65, "learning_rate": 4.637926176387715e-05, "loss": 0.7229, "step": 1954, "task_loss": 0.6651398539543152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5006612539291382, "epoch": 1.65, "learning_rate": 4.637456560533484e-05, "loss": 0.6947, "step": 1955, "task_loss": 1.1749610900878906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.473822683095932, "epoch": 1.65, "learning_rate": 4.6369869446792526e-05, "loss": 0.495, "step": 1956, "task_loss": 0.26185446977615356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6011534929275513, "epoch": 1.65, "learning_rate": 4.636517328825021e-05, "loss": 0.6001, "step": 1957, "task_loss": 0.644119381904602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6259428858757019, "epoch": 1.65, "learning_rate": 4.6360477129707905e-05, "loss": 0.6279, "step": 1958, "task_loss": 0.6063622832298279 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42479443550109863, "epoch": 1.66, "learning_rate": 4.635578097116559e-05, "loss": 0.5178, "step": 1959, "task_loss": 0.5777928233146667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6681352853775024, "epoch": 1.66, "learning_rate": 4.635108481262328e-05, "loss": 0.6557, "step": 1960, "task_loss": 0.3438609540462494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4911631941795349, "epoch": 1.66, "learning_rate": 4.6346388654080964e-05, "loss": 0.4129, "step": 1961, "task_loss": 0.6124438047409058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37149778008461, "epoch": 1.66, "learning_rate": 4.634169249553865e-05, "loss": 0.479, "step": 1962, "task_loss": 0.055624134838581085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.569645881652832, "epoch": 1.66, "learning_rate": 4.6336996336996343e-05, "loss": 0.438, "step": 1963, "task_loss": 0.08823998272418976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48873698711395264, "epoch": 1.66, "learning_rate": 4.633230017845402e-05, "loss": 0.5591, "step": 1964, "task_loss": 0.8414707779884338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5271422863006592, "epoch": 1.66, "learning_rate": 4.6327604019911716e-05, "loss": 0.4225, "step": 1965, "task_loss": 0.7537546753883362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5533090829849243, "epoch": 1.66, "learning_rate": 4.63229078613694e-05, "loss": 0.4771, "step": 1966, "task_loss": 0.8737282156944275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6077708005905151, "epoch": 1.66, "learning_rate": 4.631821170282709e-05, "loss": 0.5267, "step": 1967, "task_loss": 1.0152860879898071 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4342237710952759, "epoch": 1.66, "learning_rate": 4.6313515544284775e-05, "loss": 0.444, "step": 1968, "task_loss": 0.7713616490364075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5172666311264038, "epoch": 1.66, "learning_rate": 4.630881938574246e-05, "loss": 0.4356, "step": 1969, "task_loss": 0.37704595923423767 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5508687496185303, "epoch": 1.66, "learning_rate": 4.6304123227200155e-05, "loss": 0.6172, "step": 1970, "task_loss": 0.4439537525177002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31358426809310913, "epoch": 1.67, "learning_rate": 4.629942706865784e-05, "loss": 0.5467, "step": 1971, "task_loss": 1.3943533897399902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41853898763656616, "epoch": 1.67, "learning_rate": 4.629473091011553e-05, "loss": 0.5419, "step": 1972, "task_loss": 0.8736618161201477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5700383186340332, "epoch": 1.67, "learning_rate": 4.6290034751573214e-05, "loss": 0.5451, "step": 1973, "task_loss": 0.8704242706298828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.686416745185852, "epoch": 1.67, "learning_rate": 4.62853385930309e-05, "loss": 0.5794, "step": 1974, "task_loss": 1.0310028791427612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.783177375793457, "epoch": 1.67, "learning_rate": 4.628064243448859e-05, "loss": 0.4781, "step": 1975, "task_loss": 0.8976475596427917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5620219707489014, "epoch": 1.67, "learning_rate": 4.627594627594628e-05, "loss": 0.6571, "step": 1976, "task_loss": 1.2086716890335083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46269556879997253, "epoch": 1.67, "learning_rate": 4.6271250117403966e-05, "loss": 0.6142, "step": 1977, "task_loss": 1.2061903476715088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32798540592193604, "epoch": 1.67, "learning_rate": 4.626655395886165e-05, "loss": 0.5706, "step": 1978, "task_loss": 0.9321848750114441 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5476683974266052, "epoch": 1.67, "learning_rate": 4.626185780031934e-05, "loss": 0.5043, "step": 1979, "task_loss": 1.3027472496032715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42937058210372925, "epoch": 1.67, "learning_rate": 4.625716164177703e-05, "loss": 0.4753, "step": 1980, "task_loss": 0.32883381843566895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5658262968063354, "epoch": 1.67, "learning_rate": 4.625246548323471e-05, "loss": 0.5551, "step": 1981, "task_loss": 0.6692588329315186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45838427543640137, "epoch": 1.67, "learning_rate": 4.6247769324692404e-05, "loss": 0.5026, "step": 1982, "task_loss": 1.0062143802642822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.316168874502182, "epoch": 1.68, "learning_rate": 4.624307316615009e-05, "loss": 0.4839, "step": 1983, "task_loss": 0.5396662950515747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6459980607032776, "epoch": 1.68, "learning_rate": 4.6238377007607784e-05, "loss": 0.583, "step": 1984, "task_loss": 0.6074379086494446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5549649596214294, "epoch": 1.68, "learning_rate": 4.623368084906547e-05, "loss": 0.6475, "step": 1985, "task_loss": 0.5649117231369019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31864678859710693, "epoch": 1.68, "learning_rate": 4.622898469052315e-05, "loss": 0.5525, "step": 1986, "task_loss": 0.661083459854126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6135776042938232, "epoch": 1.68, "learning_rate": 4.622428853198084e-05, "loss": 0.6484, "step": 1987, "task_loss": 1.7059043645858765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5517882704734802, "epoch": 1.68, "learning_rate": 4.621959237343853e-05, "loss": 0.6385, "step": 1988, "task_loss": 1.5278565883636475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6723511219024658, "epoch": 1.68, "learning_rate": 4.621489621489622e-05, "loss": 0.5433, "step": 1989, "task_loss": 0.5345288515090942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.482448011636734, "epoch": 1.68, "learning_rate": 4.62102000563539e-05, "loss": 0.5286, "step": 1990, "task_loss": 0.8080672025680542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23152707517147064, "epoch": 1.68, "learning_rate": 4.6205503897811595e-05, "loss": 0.3938, "step": 1991, "task_loss": 0.5284003615379333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.354184627532959, "epoch": 1.68, "learning_rate": 4.620080773926928e-05, "loss": 0.4614, "step": 1992, "task_loss": 0.4074113368988037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2866057753562927, "epoch": 1.68, "learning_rate": 4.619611158072697e-05, "loss": 0.4559, "step": 1993, "task_loss": 0.7265781760215759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5358465909957886, "epoch": 1.69, "learning_rate": 4.6191415422184654e-05, "loss": 0.5935, "step": 1994, "task_loss": 1.2999171018600464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43576326966285706, "epoch": 1.69, "learning_rate": 4.618671926364234e-05, "loss": 0.5004, "step": 1995, "task_loss": 0.13083557784557343 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4741824269294739, "epoch": 1.69, "learning_rate": 4.618202310510003e-05, "loss": 0.5342, "step": 1996, "task_loss": 0.6869692802429199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4911336898803711, "epoch": 1.69, "learning_rate": 4.617732694655772e-05, "loss": 0.6062, "step": 1997, "task_loss": 0.6886183619499207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8936190605163574, "epoch": 1.69, "learning_rate": 4.6172630788015406e-05, "loss": 0.6436, "step": 1998, "task_loss": 1.1337511539459229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3329553008079529, "epoch": 1.69, "learning_rate": 4.616793462947309e-05, "loss": 0.4169, "step": 1999, "task_loss": 0.3013676404953003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40093374252319336, "epoch": 1.69, "learning_rate": 4.616323847093078e-05, "loss": 0.4822, "step": 2000, "task_loss": 0.20425766706466675 }, { "epoch": 1.69, "eval_accuracy": 0.9043168316831683, "eval_loss": 0.3340230882167816, "eval_runtime": 225.7911, "eval_samples_per_second": 111.829, "eval_steps_per_second": 0.877, "step": 2000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6056751608848572, "epoch": 1.69, "learning_rate": 4.615854231238847e-05, "loss": 0.4455, "step": 2001, "task_loss": 0.1757025122642517 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5189034342765808, "epoch": 1.69, "learning_rate": 4.615384615384616e-05, "loss": 0.5148, "step": 2002, "task_loss": 0.2649937570095062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5306774973869324, "epoch": 1.69, "learning_rate": 4.6149149995303844e-05, "loss": 0.6267, "step": 2003, "task_loss": 0.7969727516174316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5592530965805054, "epoch": 1.69, "learning_rate": 4.614445383676153e-05, "loss": 0.4706, "step": 2004, "task_loss": 0.6228448152542114 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8390980958938599, "epoch": 1.69, "learning_rate": 4.613975767821922e-05, "loss": 0.6654, "step": 2005, "task_loss": 0.5279920101165771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.502899706363678, "epoch": 1.7, "learning_rate": 4.613506151967691e-05, "loss": 0.7278, "step": 2006, "task_loss": 2.0717241764068604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42792630195617676, "epoch": 1.7, "learning_rate": 4.613036536113459e-05, "loss": 0.5112, "step": 2007, "task_loss": 0.06625665724277496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0002002716064453, "epoch": 1.7, "learning_rate": 4.612566920259228e-05, "loss": 0.6323, "step": 2008, "task_loss": 1.7138714790344238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41244804859161377, "epoch": 1.7, "learning_rate": 4.612097304404997e-05, "loss": 0.4658, "step": 2009, "task_loss": 0.23939073085784912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36650967597961426, "epoch": 1.7, "learning_rate": 4.6116276885507656e-05, "loss": 0.6581, "step": 2010, "task_loss": 1.0833652019500732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.597208559513092, "epoch": 1.7, "learning_rate": 4.611158072696534e-05, "loss": 0.6388, "step": 2011, "task_loss": 0.6391193866729736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4735865592956543, "epoch": 1.7, "learning_rate": 4.610688456842303e-05, "loss": 0.409, "step": 2012, "task_loss": 0.6923089623451233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4608466327190399, "epoch": 1.7, "learning_rate": 4.610218840988072e-05, "loss": 0.4577, "step": 2013, "task_loss": 1.1356209516525269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43071508407592773, "epoch": 1.7, "learning_rate": 4.609749225133841e-05, "loss": 0.4624, "step": 2014, "task_loss": 0.36064571142196655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.556703507900238, "epoch": 1.7, "learning_rate": 4.6092796092796094e-05, "loss": 0.5577, "step": 2015, "task_loss": 0.4157209098339081 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5917690992355347, "epoch": 1.7, "learning_rate": 4.608809993425378e-05, "loss": 0.5547, "step": 2016, "task_loss": 0.8628435134887695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5926486253738403, "epoch": 1.7, "learning_rate": 4.608340377571147e-05, "loss": 0.6773, "step": 2017, "task_loss": 2.050250291824341 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6237493753433228, "epoch": 1.71, "learning_rate": 4.607870761716916e-05, "loss": 0.4444, "step": 2018, "task_loss": 0.8628509044647217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5371727347373962, "epoch": 1.71, "learning_rate": 4.6074011458626846e-05, "loss": 0.5431, "step": 2019, "task_loss": 0.7547898292541504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5828044414520264, "epoch": 1.71, "learning_rate": 4.606931530008453e-05, "loss": 0.5806, "step": 2020, "task_loss": 1.0396511554718018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5458331108093262, "epoch": 1.71, "learning_rate": 4.606461914154222e-05, "loss": 0.6068, "step": 2021, "task_loss": 0.6117076873779297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7212594747543335, "epoch": 1.71, "learning_rate": 4.605992298299991e-05, "loss": 0.5277, "step": 2022, "task_loss": 0.30551138520240784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5757885575294495, "epoch": 1.71, "learning_rate": 4.60552268244576e-05, "loss": 0.4628, "step": 2023, "task_loss": 2.740645408630371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5750226378440857, "epoch": 1.71, "learning_rate": 4.605053066591528e-05, "loss": 0.7249, "step": 2024, "task_loss": 0.7301718592643738 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5468335747718811, "epoch": 1.71, "learning_rate": 4.604583450737297e-05, "loss": 0.7929, "step": 2025, "task_loss": 1.4978584051132202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3424018919467926, "epoch": 1.71, "learning_rate": 4.604113834883066e-05, "loss": 0.4583, "step": 2026, "task_loss": 0.6018313765525818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5119558572769165, "epoch": 1.71, "learning_rate": 4.603644219028835e-05, "loss": 0.5456, "step": 2027, "task_loss": 1.2052727937698364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3957095146179199, "epoch": 1.71, "learning_rate": 4.603174603174603e-05, "loss": 0.49, "step": 2028, "task_loss": 0.35815364122390747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4523797333240509, "epoch": 1.71, "learning_rate": 4.602704987320372e-05, "loss": 0.5428, "step": 2029, "task_loss": 0.4046899378299713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3696421682834625, "epoch": 1.72, "learning_rate": 4.602235371466141e-05, "loss": 0.5899, "step": 2030, "task_loss": 0.7279051542282104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4815080761909485, "epoch": 1.72, "learning_rate": 4.6017657556119096e-05, "loss": 0.5272, "step": 2031, "task_loss": 0.49486130475997925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40400150418281555, "epoch": 1.72, "learning_rate": 4.601296139757679e-05, "loss": 0.4161, "step": 2032, "task_loss": 0.1654321253299713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40661752223968506, "epoch": 1.72, "learning_rate": 4.600826523903447e-05, "loss": 0.4379, "step": 2033, "task_loss": 1.4734612703323364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46199682354927063, "epoch": 1.72, "learning_rate": 4.600356908049216e-05, "loss": 0.512, "step": 2034, "task_loss": 0.9092661142349243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5806515216827393, "epoch": 1.72, "learning_rate": 4.599887292194985e-05, "loss": 0.588, "step": 2035, "task_loss": 0.5865480899810791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5678511261940002, "epoch": 1.72, "learning_rate": 4.5994176763407534e-05, "loss": 0.5869, "step": 2036, "task_loss": 1.0105096101760864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6642439365386963, "epoch": 1.72, "learning_rate": 4.598948060486522e-05, "loss": 0.563, "step": 2037, "task_loss": 1.2371654510498047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7536219358444214, "epoch": 1.72, "learning_rate": 4.598478444632291e-05, "loss": 0.6219, "step": 2038, "task_loss": 1.0479451417922974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40475180745124817, "epoch": 1.72, "learning_rate": 4.59800882877806e-05, "loss": 0.5855, "step": 2039, "task_loss": 0.4585319757461548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6237762570381165, "epoch": 1.72, "learning_rate": 4.5975392129238286e-05, "loss": 0.5126, "step": 2040, "task_loss": 1.0119225978851318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5477268695831299, "epoch": 1.72, "learning_rate": 4.597069597069597e-05, "loss": 0.5777, "step": 2041, "task_loss": 0.5736196637153625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6207205057144165, "epoch": 1.73, "learning_rate": 4.596599981215366e-05, "loss": 0.6074, "step": 2042, "task_loss": 0.5363821387290955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49455398321151733, "epoch": 1.73, "learning_rate": 4.5961303653611346e-05, "loss": 0.4257, "step": 2043, "task_loss": 0.9934645891189575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5333952307701111, "epoch": 1.73, "learning_rate": 4.595660749506904e-05, "loss": 0.6664, "step": 2044, "task_loss": 0.7820777893066406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39221054315567017, "epoch": 1.73, "learning_rate": 4.595191133652672e-05, "loss": 0.5043, "step": 2045, "task_loss": 1.2235773801803589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23025313019752502, "epoch": 1.73, "learning_rate": 4.594721517798441e-05, "loss": 0.5182, "step": 2046, "task_loss": 0.25560957193374634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3526519536972046, "epoch": 1.73, "learning_rate": 4.59425190194421e-05, "loss": 0.6246, "step": 2047, "task_loss": 1.1382291316986084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40422725677490234, "epoch": 1.73, "learning_rate": 4.593782286089979e-05, "loss": 0.4954, "step": 2048, "task_loss": 0.4588763117790222 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6192618608474731, "epoch": 1.73, "learning_rate": 4.593312670235748e-05, "loss": 0.5411, "step": 2049, "task_loss": 1.1786308288574219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6077077984809875, "epoch": 1.73, "learning_rate": 4.592843054381516e-05, "loss": 0.7234, "step": 2050, "task_loss": 0.49861037731170654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.501878559589386, "epoch": 1.73, "learning_rate": 4.592373438527285e-05, "loss": 0.5473, "step": 2051, "task_loss": 0.35508450865745544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4724075496196747, "epoch": 1.73, "learning_rate": 4.5919038226730536e-05, "loss": 0.6414, "step": 2052, "task_loss": 0.7870118021965027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39327967166900635, "epoch": 1.73, "learning_rate": 4.591434206818823e-05, "loss": 0.3853, "step": 2053, "task_loss": 0.5937086939811707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3592796325683594, "epoch": 1.74, "learning_rate": 4.590964590964591e-05, "loss": 0.5272, "step": 2054, "task_loss": 0.17681090533733368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6329157948493958, "epoch": 1.74, "learning_rate": 4.59049497511036e-05, "loss": 0.4732, "step": 2055, "task_loss": 1.2758135795593262 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7073512673377991, "epoch": 1.74, "learning_rate": 4.590025359256129e-05, "loss": 0.6059, "step": 2056, "task_loss": 0.7978444695472717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4669090807437897, "epoch": 1.74, "learning_rate": 4.5895557434018975e-05, "loss": 0.5368, "step": 2057, "task_loss": 1.1333445310592651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4513433873653412, "epoch": 1.74, "learning_rate": 4.589086127547666e-05, "loss": 0.4598, "step": 2058, "task_loss": 0.7902257442474365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5717073678970337, "epoch": 1.74, "learning_rate": 4.588616511693435e-05, "loss": 0.515, "step": 2059, "task_loss": 1.551400065422058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31662800908088684, "epoch": 1.74, "learning_rate": 4.588146895839204e-05, "loss": 0.5206, "step": 2060, "task_loss": 0.05908510461449623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22665706276893616, "epoch": 1.74, "learning_rate": 4.587677279984973e-05, "loss": 0.5632, "step": 2061, "task_loss": 0.09973838180303574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.577019214630127, "epoch": 1.74, "learning_rate": 4.587207664130741e-05, "loss": 0.5171, "step": 2062, "task_loss": 0.9770259857177734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5959432125091553, "epoch": 1.74, "learning_rate": 4.58673804827651e-05, "loss": 0.4201, "step": 2063, "task_loss": 0.04249989241361618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.394430011510849, "epoch": 1.74, "learning_rate": 4.5862684324222786e-05, "loss": 0.4128, "step": 2064, "task_loss": 1.064234733581543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5937154293060303, "epoch": 1.75, "learning_rate": 4.585798816568048e-05, "loss": 0.5038, "step": 2065, "task_loss": 0.5921313166618347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.512501060962677, "epoch": 1.75, "learning_rate": 4.5853292007138165e-05, "loss": 0.6374, "step": 2066, "task_loss": 0.5569763779640198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3904300928115845, "epoch": 1.75, "learning_rate": 4.584859584859585e-05, "loss": 0.5755, "step": 2067, "task_loss": 0.8895431160926819 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0078972578048706, "epoch": 1.75, "learning_rate": 4.584389969005354e-05, "loss": 0.6333, "step": 2068, "task_loss": 0.9554455280303955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38069719076156616, "epoch": 1.75, "learning_rate": 4.5839203531511224e-05, "loss": 0.5563, "step": 2069, "task_loss": 0.870195209980011 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5745996236801147, "epoch": 1.75, "learning_rate": 4.583450737296892e-05, "loss": 0.5973, "step": 2070, "task_loss": 0.05846540629863739 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3373928964138031, "epoch": 1.75, "learning_rate": 4.58298112144266e-05, "loss": 0.4311, "step": 2071, "task_loss": 0.12259690463542938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41030409932136536, "epoch": 1.75, "learning_rate": 4.582511505588429e-05, "loss": 0.6165, "step": 2072, "task_loss": 0.08712349832057953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48414942622184753, "epoch": 1.75, "learning_rate": 4.5820418897341976e-05, "loss": 0.67, "step": 2073, "task_loss": 0.3630458414554596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5082026720046997, "epoch": 1.75, "learning_rate": 4.581572273879966e-05, "loss": 0.4691, "step": 2074, "task_loss": 0.5409849882125854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4976435601711273, "epoch": 1.75, "learning_rate": 4.581102658025735e-05, "loss": 0.5331, "step": 2075, "task_loss": 0.9141504764556885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5039362907409668, "epoch": 1.75, "learning_rate": 4.5806330421715035e-05, "loss": 0.4497, "step": 2076, "task_loss": 0.33117565512657166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5112289190292358, "epoch": 1.76, "learning_rate": 4.580163426317273e-05, "loss": 0.4831, "step": 2077, "task_loss": 0.42792055010795593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47932836413383484, "epoch": 1.76, "learning_rate": 4.5796938104630415e-05, "loss": 0.5191, "step": 2078, "task_loss": 0.5953351259231567 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37775322794914246, "epoch": 1.76, "learning_rate": 4.579224194608811e-05, "loss": 0.6446, "step": 2079, "task_loss": 0.18047462403774261 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3844433128833771, "epoch": 1.76, "learning_rate": 4.578754578754579e-05, "loss": 0.5781, "step": 2080, "task_loss": 0.7675375938415527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6697014570236206, "epoch": 1.76, "learning_rate": 4.5782849629003474e-05, "loss": 0.692, "step": 2081, "task_loss": 1.051684856414795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6656203269958496, "epoch": 1.76, "learning_rate": 4.577815347046117e-05, "loss": 0.6218, "step": 2082, "task_loss": 0.6539474725723267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6079275608062744, "epoch": 1.76, "learning_rate": 4.577345731191885e-05, "loss": 0.4963, "step": 2083, "task_loss": 2.5518834590911865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30683356523513794, "epoch": 1.76, "learning_rate": 4.576876115337654e-05, "loss": 0.4201, "step": 2084, "task_loss": 0.08785250782966614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5060566067695618, "epoch": 1.76, "learning_rate": 4.5764064994834226e-05, "loss": 0.5084, "step": 2085, "task_loss": 2.1791818141937256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5173379778862, "epoch": 1.76, "learning_rate": 4.575936883629192e-05, "loss": 0.4961, "step": 2086, "task_loss": 0.5354496240615845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2263544350862503, "epoch": 1.76, "learning_rate": 4.5754672677749605e-05, "loss": 0.4305, "step": 2087, "task_loss": 0.29342469573020935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6126644015312195, "epoch": 1.76, "learning_rate": 4.5749976519207285e-05, "loss": 0.52, "step": 2088, "task_loss": 0.3567040264606476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6929805874824524, "epoch": 1.77, "learning_rate": 4.574528036066498e-05, "loss": 0.5792, "step": 2089, "task_loss": 1.4266631603240967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4380316138267517, "epoch": 1.77, "learning_rate": 4.5740584202122664e-05, "loss": 0.6447, "step": 2090, "task_loss": 1.1087794303894043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3530140519142151, "epoch": 1.77, "learning_rate": 4.573588804358036e-05, "loss": 0.6177, "step": 2091, "task_loss": 0.6734133958816528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35643619298934937, "epoch": 1.77, "learning_rate": 4.573119188503804e-05, "loss": 0.5871, "step": 2092, "task_loss": 0.7622537016868591 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8660944104194641, "epoch": 1.77, "learning_rate": 4.572649572649573e-05, "loss": 0.6362, "step": 2093, "task_loss": 1.4929794073104858 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5037628412246704, "epoch": 1.77, "learning_rate": 4.5721799567953417e-05, "loss": 0.6219, "step": 2094, "task_loss": 0.7741486430168152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7135202884674072, "epoch": 1.77, "learning_rate": 4.57171034094111e-05, "loss": 0.5639, "step": 2095, "task_loss": 1.14582359790802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41710248589515686, "epoch": 1.77, "learning_rate": 4.5712407250868796e-05, "loss": 0.4457, "step": 2096, "task_loss": 1.0626248121261597 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5848379731178284, "epoch": 1.77, "learning_rate": 4.5707711092326476e-05, "loss": 0.6873, "step": 2097, "task_loss": 1.3410942554473877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2532356381416321, "epoch": 1.77, "learning_rate": 4.570301493378417e-05, "loss": 0.4545, "step": 2098, "task_loss": 0.8830112814903259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5295059680938721, "epoch": 1.77, "learning_rate": 4.5698318775241855e-05, "loss": 0.5924, "step": 2099, "task_loss": 0.691349446773529 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32275399565696716, "epoch": 1.77, "learning_rate": 4.569362261669954e-05, "loss": 0.425, "step": 2100, "task_loss": 0.45667481422424316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4267581105232239, "epoch": 1.78, "learning_rate": 4.568892645815723e-05, "loss": 0.5115, "step": 2101, "task_loss": 0.5329892039299011 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.726520299911499, "epoch": 1.78, "learning_rate": 4.5684230299614914e-05, "loss": 0.5653, "step": 2102, "task_loss": 0.9000964760780334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4278215765953064, "epoch": 1.78, "learning_rate": 4.567953414107261e-05, "loss": 0.5559, "step": 2103, "task_loss": 0.6046202778816223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3282686471939087, "epoch": 1.78, "learning_rate": 4.5674837982530294e-05, "loss": 0.4602, "step": 2104, "task_loss": 0.6944946646690369 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8411785364151001, "epoch": 1.78, "learning_rate": 4.567014182398798e-05, "loss": 0.6693, "step": 2105, "task_loss": 0.8372368812561035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5440412163734436, "epoch": 1.78, "learning_rate": 4.5665445665445666e-05, "loss": 0.5521, "step": 2106, "task_loss": 1.2641083002090454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7302548885345459, "epoch": 1.78, "learning_rate": 4.566074950690335e-05, "loss": 0.7717, "step": 2107, "task_loss": 0.9495037794113159 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.694897472858429, "epoch": 1.78, "learning_rate": 4.5656053348361046e-05, "loss": 0.5654, "step": 2108, "task_loss": 0.9573599100112915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7324835062026978, "epoch": 1.78, "learning_rate": 4.565135718981873e-05, "loss": 0.64, "step": 2109, "task_loss": 1.4696638584136963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3057940900325775, "epoch": 1.78, "learning_rate": 4.564666103127642e-05, "loss": 0.5551, "step": 2110, "task_loss": 0.5381582379341125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49201852083206177, "epoch": 1.78, "learning_rate": 4.5641964872734105e-05, "loss": 0.6034, "step": 2111, "task_loss": 0.38586488366127014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5427163243293762, "epoch": 1.78, "learning_rate": 4.563726871419179e-05, "loss": 0.6808, "step": 2112, "task_loss": 0.2384076714515686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3981996774673462, "epoch": 1.79, "learning_rate": 4.5632572555649484e-05, "loss": 0.4919, "step": 2113, "task_loss": 0.12832409143447876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5524776577949524, "epoch": 1.79, "learning_rate": 4.5627876397107164e-05, "loss": 0.5455, "step": 2114, "task_loss": 0.6574081778526306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7355163097381592, "epoch": 1.79, "learning_rate": 4.562318023856486e-05, "loss": 0.6219, "step": 2115, "task_loss": 1.087763786315918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4812569320201874, "epoch": 1.79, "learning_rate": 4.561848408002254e-05, "loss": 0.5088, "step": 2116, "task_loss": 0.9188875555992126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5180171728134155, "epoch": 1.79, "learning_rate": 4.5613787921480236e-05, "loss": 0.5295, "step": 2117, "task_loss": 0.38163769245147705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6208527088165283, "epoch": 1.79, "learning_rate": 4.5609091762937916e-05, "loss": 0.4716, "step": 2118, "task_loss": 0.5878293514251709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5196279287338257, "epoch": 1.79, "learning_rate": 4.56043956043956e-05, "loss": 0.4937, "step": 2119, "task_loss": 1.462790846824646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43340086936950684, "epoch": 1.79, "learning_rate": 4.5599699445853295e-05, "loss": 0.8449, "step": 2120, "task_loss": 0.6878465414047241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49001726508140564, "epoch": 1.79, "learning_rate": 4.559500328731098e-05, "loss": 0.4514, "step": 2121, "task_loss": 1.3996986150741577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4845588207244873, "epoch": 1.79, "learning_rate": 4.559030712876867e-05, "loss": 0.4902, "step": 2122, "task_loss": 0.6727762222290039 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7658233046531677, "epoch": 1.79, "learning_rate": 4.5585610970226354e-05, "loss": 0.7135, "step": 2123, "task_loss": 1.421358585357666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5876352787017822, "epoch": 1.79, "learning_rate": 4.558091481168405e-05, "loss": 0.5955, "step": 2124, "task_loss": 1.5994890928268433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.773114025592804, "epoch": 1.8, "learning_rate": 4.5576218653141734e-05, "loss": 0.6915, "step": 2125, "task_loss": 0.8267909288406372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5202220678329468, "epoch": 1.8, "learning_rate": 4.557152249459942e-05, "loss": 0.5609, "step": 2126, "task_loss": 0.8998216986656189 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5299263000488281, "epoch": 1.8, "learning_rate": 4.5566826336057106e-05, "loss": 0.5724, "step": 2127, "task_loss": 0.554583728313446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5065711736679077, "epoch": 1.8, "learning_rate": 4.556213017751479e-05, "loss": 0.5646, "step": 2128, "task_loss": 0.7100570797920227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7426444888114929, "epoch": 1.8, "learning_rate": 4.5557434018972486e-05, "loss": 0.598, "step": 2129, "task_loss": 0.29383257031440735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2409677803516388, "epoch": 1.8, "learning_rate": 4.555273786043017e-05, "loss": 0.4977, "step": 2130, "task_loss": 0.327333927154541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5605467557907104, "epoch": 1.8, "learning_rate": 4.554804170188786e-05, "loss": 0.4469, "step": 2131, "task_loss": 0.40042293071746826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46402814984321594, "epoch": 1.8, "learning_rate": 4.5543345543345545e-05, "loss": 0.4209, "step": 2132, "task_loss": 1.2897979021072388 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41473808884620667, "epoch": 1.8, "learning_rate": 4.553864938480323e-05, "loss": 0.4368, "step": 2133, "task_loss": 0.8982406854629517 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6944454312324524, "epoch": 1.8, "learning_rate": 4.5533953226260924e-05, "loss": 0.606, "step": 2134, "task_loss": 0.388278603553772 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47800424695014954, "epoch": 1.8, "learning_rate": 4.5529257067718604e-05, "loss": 0.476, "step": 2135, "task_loss": 1.2383863925933838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2845986783504486, "epoch": 1.81, "learning_rate": 4.55245609091763e-05, "loss": 0.4018, "step": 2136, "task_loss": 0.44248855113983154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33319205045700073, "epoch": 1.81, "learning_rate": 4.5519864750633983e-05, "loss": 0.4339, "step": 2137, "task_loss": 1.3044594526290894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.84636390209198, "epoch": 1.81, "learning_rate": 4.551516859209167e-05, "loss": 0.7717, "step": 2138, "task_loss": 1.559424877166748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28313642740249634, "epoch": 1.81, "learning_rate": 4.551047243354936e-05, "loss": 0.3565, "step": 2139, "task_loss": 0.5696869492530823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6613853573799133, "epoch": 1.81, "learning_rate": 4.550577627500704e-05, "loss": 0.5762, "step": 2140, "task_loss": 0.6211257576942444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37339550256729126, "epoch": 1.81, "learning_rate": 4.5501080116464736e-05, "loss": 0.4921, "step": 2141, "task_loss": 0.0860699936747551 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9557201862335205, "epoch": 1.81, "learning_rate": 4.549638395792242e-05, "loss": 0.6757, "step": 2142, "task_loss": 1.161236047744751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6511314511299133, "epoch": 1.81, "learning_rate": 4.549168779938011e-05, "loss": 0.7426, "step": 2143, "task_loss": 1.2010455131530762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6409620046615601, "epoch": 1.81, "learning_rate": 4.5486991640837795e-05, "loss": 0.5546, "step": 2144, "task_loss": 0.9285746216773987 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4521445035934448, "epoch": 1.81, "learning_rate": 4.548229548229548e-05, "loss": 0.5207, "step": 2145, "task_loss": 0.4292203485965729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45739805698394775, "epoch": 1.81, "learning_rate": 4.5477599323753174e-05, "loss": 0.489, "step": 2146, "task_loss": 1.1354703903198242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39963921904563904, "epoch": 1.81, "learning_rate": 4.547290316521086e-05, "loss": 0.606, "step": 2147, "task_loss": 0.4132191240787506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.743577778339386, "epoch": 1.82, "learning_rate": 4.546820700666855e-05, "loss": 0.5648, "step": 2148, "task_loss": 0.31799301505088806 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6051943302154541, "epoch": 1.82, "learning_rate": 4.546351084812623e-05, "loss": 0.5222, "step": 2149, "task_loss": 0.6442619562149048 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3257400393486023, "epoch": 1.82, "learning_rate": 4.5458814689583926e-05, "loss": 0.4988, "step": 2150, "task_loss": 0.1705164909362793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5038992166519165, "epoch": 1.82, "learning_rate": 4.545411853104161e-05, "loss": 0.4818, "step": 2151, "task_loss": 0.6619661450386047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5143388509750366, "epoch": 1.82, "learning_rate": 4.544942237249929e-05, "loss": 0.5611, "step": 2152, "task_loss": 0.9020752906799316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5744906663894653, "epoch": 1.82, "learning_rate": 4.5444726213956985e-05, "loss": 0.4857, "step": 2153, "task_loss": 0.8326172232627869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6252625584602356, "epoch": 1.82, "learning_rate": 4.544003005541467e-05, "loss": 0.5781, "step": 2154, "task_loss": 1.0189082622528076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35213562846183777, "epoch": 1.82, "learning_rate": 4.5435333896872365e-05, "loss": 0.4495, "step": 2155, "task_loss": 0.5221988558769226 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6378566026687622, "epoch": 1.82, "learning_rate": 4.543063773833005e-05, "loss": 0.5003, "step": 2156, "task_loss": 1.221617341041565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6205003261566162, "epoch": 1.82, "learning_rate": 4.542594157978774e-05, "loss": 0.5712, "step": 2157, "task_loss": 0.7255807518959045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37464118003845215, "epoch": 1.82, "learning_rate": 4.5421245421245424e-05, "loss": 0.5051, "step": 2158, "task_loss": 0.4357612729072571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40858331322669983, "epoch": 1.82, "learning_rate": 4.541654926270311e-05, "loss": 0.5121, "step": 2159, "task_loss": 0.7282910943031311 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5164951086044312, "epoch": 1.83, "learning_rate": 4.54118531041608e-05, "loss": 0.5958, "step": 2160, "task_loss": 0.8732027411460876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2910928428173065, "epoch": 1.83, "learning_rate": 4.540715694561848e-05, "loss": 0.5391, "step": 2161, "task_loss": 0.1761472225189209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7221039533615112, "epoch": 1.83, "learning_rate": 4.5402460787076176e-05, "loss": 0.6856, "step": 2162, "task_loss": 0.6830424666404724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29133325815200806, "epoch": 1.83, "learning_rate": 4.539776462853386e-05, "loss": 0.5395, "step": 2163, "task_loss": 0.07514042407274246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5497267842292786, "epoch": 1.83, "learning_rate": 4.539306846999155e-05, "loss": 0.5386, "step": 2164, "task_loss": 0.564789891242981 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5403572916984558, "epoch": 1.83, "learning_rate": 4.5388372311449235e-05, "loss": 0.4869, "step": 2165, "task_loss": 0.7605661749839783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.594100832939148, "epoch": 1.83, "learning_rate": 4.538367615290692e-05, "loss": 0.4764, "step": 2166, "task_loss": 1.2246366739273071 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4309810996055603, "epoch": 1.83, "learning_rate": 4.5378979994364614e-05, "loss": 0.5521, "step": 2167, "task_loss": 0.42574670910835266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7192362546920776, "epoch": 1.83, "learning_rate": 4.53742838358223e-05, "loss": 0.655, "step": 2168, "task_loss": 0.8757136464118958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3836696147918701, "epoch": 1.83, "learning_rate": 4.536958767727999e-05, "loss": 0.6426, "step": 2169, "task_loss": 0.18924051523208618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4227776825428009, "epoch": 1.83, "learning_rate": 4.536489151873767e-05, "loss": 0.4202, "step": 2170, "task_loss": 0.8752092719078064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5961665511131287, "epoch": 1.83, "learning_rate": 4.536019536019536e-05, "loss": 0.606, "step": 2171, "task_loss": 1.334804892539978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48913857340812683, "epoch": 1.84, "learning_rate": 4.535549920165305e-05, "loss": 0.486, "step": 2172, "task_loss": 1.5451011657714844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.840120792388916, "epoch": 1.84, "learning_rate": 4.535080304311074e-05, "loss": 0.532, "step": 2173, "task_loss": 0.5846976637840271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4275282919406891, "epoch": 1.84, "learning_rate": 4.5346106884568425e-05, "loss": 0.502, "step": 2174, "task_loss": 0.7746036648750305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.249198779463768, "epoch": 1.84, "learning_rate": 4.534141072602611e-05, "loss": 0.4392, "step": 2175, "task_loss": 0.13244381546974182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44336840510368347, "epoch": 1.84, "learning_rate": 4.53367145674838e-05, "loss": 0.4917, "step": 2176, "task_loss": 1.4354397058486938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34338217973709106, "epoch": 1.84, "learning_rate": 4.533201840894149e-05, "loss": 0.4511, "step": 2177, "task_loss": 0.3205278813838959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.325478732585907, "epoch": 1.84, "learning_rate": 4.532732225039917e-05, "loss": 0.4313, "step": 2178, "task_loss": 0.7874426245689392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3221462368965149, "epoch": 1.84, "learning_rate": 4.5322626091856864e-05, "loss": 0.4714, "step": 2179, "task_loss": 0.8705538511276245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4216153919696808, "epoch": 1.84, "learning_rate": 4.531792993331455e-05, "loss": 0.4323, "step": 2180, "task_loss": 0.9092909693717957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43904808163642883, "epoch": 1.84, "learning_rate": 4.531323377477224e-05, "loss": 0.497, "step": 2181, "task_loss": 0.8948734998703003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3203968405723572, "epoch": 1.84, "learning_rate": 4.530853761622992e-05, "loss": 0.3897, "step": 2182, "task_loss": 0.3657708168029785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4434625804424286, "epoch": 1.84, "learning_rate": 4.530384145768761e-05, "loss": 0.4992, "step": 2183, "task_loss": 1.5482105016708374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48431670665740967, "epoch": 1.85, "learning_rate": 4.52991452991453e-05, "loss": 0.6304, "step": 2184, "task_loss": 1.1567599773406982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6667308807373047, "epoch": 1.85, "learning_rate": 4.529444914060299e-05, "loss": 0.5838, "step": 2185, "task_loss": 0.5439514517784119 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5374598503112793, "epoch": 1.85, "learning_rate": 4.528975298206068e-05, "loss": 0.6373, "step": 2186, "task_loss": 1.7690643072128296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6187200546264648, "epoch": 1.85, "learning_rate": 4.528505682351836e-05, "loss": 0.4218, "step": 2187, "task_loss": 0.4923951327800751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6206231117248535, "epoch": 1.85, "learning_rate": 4.5280360664976054e-05, "loss": 0.5245, "step": 2188, "task_loss": 0.5889687538146973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39784836769104004, "epoch": 1.85, "learning_rate": 4.527566450643374e-05, "loss": 0.3538, "step": 2189, "task_loss": 0.46454864740371704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4487486481666565, "epoch": 1.85, "learning_rate": 4.527096834789143e-05, "loss": 0.4732, "step": 2190, "task_loss": 0.734583854675293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5258761644363403, "epoch": 1.85, "learning_rate": 4.5266272189349114e-05, "loss": 0.5239, "step": 2191, "task_loss": 0.6086968183517456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3979544937610626, "epoch": 1.85, "learning_rate": 4.52615760308068e-05, "loss": 0.4262, "step": 2192, "task_loss": 0.4238906800746918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34142470359802246, "epoch": 1.85, "learning_rate": 4.525687987226449e-05, "loss": 0.4683, "step": 2193, "task_loss": 0.525591254234314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4198521375656128, "epoch": 1.85, "learning_rate": 4.525218371372218e-05, "loss": 0.4132, "step": 2194, "task_loss": 0.4189967215061188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6938943266868591, "epoch": 1.85, "learning_rate": 4.5247487555179866e-05, "loss": 0.4834, "step": 2195, "task_loss": 0.41714605689048767 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.377368688583374, "epoch": 1.86, "learning_rate": 4.524279139663755e-05, "loss": 0.5465, "step": 2196, "task_loss": 0.25155481696128845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3696133494377136, "epoch": 1.86, "learning_rate": 4.523809523809524e-05, "loss": 0.447, "step": 2197, "task_loss": 0.601987361907959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19098202884197235, "epoch": 1.86, "learning_rate": 4.523339907955293e-05, "loss": 0.4491, "step": 2198, "task_loss": 0.3907912075519562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4875333309173584, "epoch": 1.86, "learning_rate": 4.522870292101061e-05, "loss": 0.5661, "step": 2199, "task_loss": 0.6661162376403809 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32736656069755554, "epoch": 1.86, "learning_rate": 4.5224006762468304e-05, "loss": 0.5271, "step": 2200, "task_loss": 0.46076393127441406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6540840268135071, "epoch": 1.86, "learning_rate": 4.521931060392599e-05, "loss": 0.4811, "step": 2201, "task_loss": 0.5711532831192017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29404348134994507, "epoch": 1.86, "learning_rate": 4.521461444538368e-05, "loss": 0.4116, "step": 2202, "task_loss": 0.32152193784713745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3452158272266388, "epoch": 1.86, "learning_rate": 4.520991828684137e-05, "loss": 0.3755, "step": 2203, "task_loss": 0.5075838565826416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3322318196296692, "epoch": 1.86, "learning_rate": 4.520522212829905e-05, "loss": 0.4051, "step": 2204, "task_loss": 0.6758256554603577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6138968467712402, "epoch": 1.86, "learning_rate": 4.520052596975674e-05, "loss": 0.4511, "step": 2205, "task_loss": 0.6952475905418396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5258011817932129, "epoch": 1.86, "learning_rate": 4.519582981121443e-05, "loss": 0.4848, "step": 2206, "task_loss": 0.3543211817741394 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4825882017612457, "epoch": 1.87, "learning_rate": 4.5191133652672115e-05, "loss": 0.5943, "step": 2207, "task_loss": 1.036073923110962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.424541711807251, "epoch": 1.87, "learning_rate": 4.51864374941298e-05, "loss": 0.4681, "step": 2208, "task_loss": 0.5150195956230164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5216034650802612, "epoch": 1.87, "learning_rate": 4.518174133558749e-05, "loss": 0.4508, "step": 2209, "task_loss": 0.7688934803009033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47978830337524414, "epoch": 1.87, "learning_rate": 4.517704517704518e-05, "loss": 0.4024, "step": 2210, "task_loss": 0.8857430815696716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3829491138458252, "epoch": 1.87, "learning_rate": 4.517234901850287e-05, "loss": 0.3935, "step": 2211, "task_loss": 0.6461055278778076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5216912627220154, "epoch": 1.87, "learning_rate": 4.5167652859960554e-05, "loss": 0.5234, "step": 2212, "task_loss": 1.2143796682357788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6324557065963745, "epoch": 1.87, "learning_rate": 4.516295670141824e-05, "loss": 0.5117, "step": 2213, "task_loss": 0.7220707535743713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5964484810829163, "epoch": 1.87, "learning_rate": 4.5158260542875926e-05, "loss": 0.595, "step": 2214, "task_loss": 0.825914204120636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8328298926353455, "epoch": 1.87, "learning_rate": 4.515356438433362e-05, "loss": 0.5054, "step": 2215, "task_loss": 0.6478527784347534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8233658671379089, "epoch": 1.87, "learning_rate": 4.5148868225791306e-05, "loss": 0.5238, "step": 2216, "task_loss": 0.6225942969322205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47511136531829834, "epoch": 1.87, "learning_rate": 4.514417206724899e-05, "loss": 0.4881, "step": 2217, "task_loss": 0.8083503246307373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4889293909072876, "epoch": 1.87, "learning_rate": 4.513947590870668e-05, "loss": 0.5049, "step": 2218, "task_loss": 1.7988728284835815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30787986516952515, "epoch": 1.88, "learning_rate": 4.513477975016437e-05, "loss": 0.3902, "step": 2219, "task_loss": 0.41925522685050964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21007980406284332, "epoch": 1.88, "learning_rate": 4.513008359162206e-05, "loss": 0.5485, "step": 2220, "task_loss": 0.295864462852478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3120136857032776, "epoch": 1.88, "learning_rate": 4.5125387433079744e-05, "loss": 0.5496, "step": 2221, "task_loss": 0.36127927899360657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5249415040016174, "epoch": 1.88, "learning_rate": 4.512069127453743e-05, "loss": 0.4676, "step": 2222, "task_loss": 0.5789380669593811 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6903780102729797, "epoch": 1.88, "learning_rate": 4.511599511599512e-05, "loss": 0.5373, "step": 2223, "task_loss": 1.1258944272994995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7008808851242065, "epoch": 1.88, "learning_rate": 4.511129895745281e-05, "loss": 0.5441, "step": 2224, "task_loss": 0.97614985704422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3279248774051666, "epoch": 1.88, "learning_rate": 4.510660279891049e-05, "loss": 0.5029, "step": 2225, "task_loss": 0.9407379627227783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26409387588500977, "epoch": 1.88, "learning_rate": 4.510190664036818e-05, "loss": 0.4341, "step": 2226, "task_loss": 0.1965026706457138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4086940884590149, "epoch": 1.88, "learning_rate": 4.509721048182587e-05, "loss": 0.5744, "step": 2227, "task_loss": 0.42394861578941345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5228321552276611, "epoch": 1.88, "learning_rate": 4.5092514323283556e-05, "loss": 0.5587, "step": 2228, "task_loss": 0.8160446882247925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9729080200195312, "epoch": 1.88, "learning_rate": 4.508781816474124e-05, "loss": 0.6297, "step": 2229, "task_loss": 0.6197282671928406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5715774297714233, "epoch": 1.88, "learning_rate": 4.508312200619893e-05, "loss": 0.7383, "step": 2230, "task_loss": 1.2207638025283813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41616880893707275, "epoch": 1.89, "learning_rate": 4.507842584765662e-05, "loss": 0.6256, "step": 2231, "task_loss": 0.9111135601997375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3776910901069641, "epoch": 1.89, "learning_rate": 4.507372968911431e-05, "loss": 0.4949, "step": 2232, "task_loss": 0.3976961076259613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5620392560958862, "epoch": 1.89, "learning_rate": 4.5069033530571994e-05, "loss": 0.5899, "step": 2233, "task_loss": 0.6888552904129028 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6922993659973145, "epoch": 1.89, "learning_rate": 4.506433737202968e-05, "loss": 0.5987, "step": 2234, "task_loss": 0.18395188450813293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5669347047805786, "epoch": 1.89, "learning_rate": 4.505964121348737e-05, "loss": 0.5293, "step": 2235, "task_loss": 0.9097599983215332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44983458518981934, "epoch": 1.89, "learning_rate": 4.505494505494506e-05, "loss": 0.3338, "step": 2236, "task_loss": 0.3221149742603302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7618392705917358, "epoch": 1.89, "learning_rate": 4.5050248896402746e-05, "loss": 0.5953, "step": 2237, "task_loss": 0.8235552310943604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36068716645240784, "epoch": 1.89, "learning_rate": 4.504555273786043e-05, "loss": 0.4359, "step": 2238, "task_loss": 0.8553930521011353 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5971731543540955, "epoch": 1.89, "learning_rate": 4.504085657931812e-05, "loss": 0.52, "step": 2239, "task_loss": 0.6797471642494202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46504899859428406, "epoch": 1.89, "learning_rate": 4.5036160420775805e-05, "loss": 0.4852, "step": 2240, "task_loss": 0.6443586349487305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5404980182647705, "epoch": 1.89, "learning_rate": 4.50314642622335e-05, "loss": 0.5773, "step": 2241, "task_loss": 0.5632404088973999 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42060065269470215, "epoch": 1.89, "learning_rate": 4.502676810369118e-05, "loss": 0.6403, "step": 2242, "task_loss": 1.4048517942428589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.596637487411499, "epoch": 1.9, "learning_rate": 4.502207194514887e-05, "loss": 0.4748, "step": 2243, "task_loss": 0.9007369875907898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48431819677352905, "epoch": 1.9, "learning_rate": 4.501737578660656e-05, "loss": 0.502, "step": 2244, "task_loss": 0.6001676321029663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3950881361961365, "epoch": 1.9, "learning_rate": 4.501267962806425e-05, "loss": 0.605, "step": 2245, "task_loss": 0.5265710353851318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6013555526733398, "epoch": 1.9, "learning_rate": 4.500798346952193e-05, "loss": 0.5276, "step": 2246, "task_loss": 0.7407698035240173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6055643558502197, "epoch": 1.9, "learning_rate": 4.5003287310979616e-05, "loss": 0.5804, "step": 2247, "task_loss": 0.6806430816650391 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5544734001159668, "epoch": 1.9, "learning_rate": 4.499859115243731e-05, "loss": 0.5767, "step": 2248, "task_loss": 1.0623644590377808 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6672912836074829, "epoch": 1.9, "learning_rate": 4.4993894993894996e-05, "loss": 0.5892, "step": 2249, "task_loss": 0.8079981207847595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8296298980712891, "epoch": 1.9, "learning_rate": 4.498919883535269e-05, "loss": 0.5586, "step": 2250, "task_loss": 1.6060644388198853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46345436573028564, "epoch": 1.9, "learning_rate": 4.498450267681037e-05, "loss": 0.4935, "step": 2251, "task_loss": 0.9264352917671204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.241556778550148, "epoch": 1.9, "learning_rate": 4.497980651826806e-05, "loss": 0.5152, "step": 2252, "task_loss": 0.3217388391494751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4721856713294983, "epoch": 1.9, "learning_rate": 4.497511035972575e-05, "loss": 0.4538, "step": 2253, "task_loss": 0.5739006400108337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47141534090042114, "epoch": 1.9, "learning_rate": 4.4970414201183434e-05, "loss": 0.3856, "step": 2254, "task_loss": 0.21546004712581635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45501384139060974, "epoch": 1.91, "learning_rate": 4.496571804264112e-05, "loss": 0.3706, "step": 2255, "task_loss": 1.1028428077697754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41102826595306396, "epoch": 1.91, "learning_rate": 4.496102188409881e-05, "loss": 0.5057, "step": 2256, "task_loss": 0.4980851113796234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4381043314933777, "epoch": 1.91, "learning_rate": 4.49563257255565e-05, "loss": 0.5877, "step": 2257, "task_loss": 0.10185384750366211 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2724301815032959, "epoch": 1.91, "learning_rate": 4.4951629567014186e-05, "loss": 0.3232, "step": 2258, "task_loss": 0.030740652233362198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34063920378685, "epoch": 1.91, "learning_rate": 4.494693340847187e-05, "loss": 0.5142, "step": 2259, "task_loss": 0.7825870513916016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3411761522293091, "epoch": 1.91, "learning_rate": 4.494223724992956e-05, "loss": 0.4078, "step": 2260, "task_loss": 1.1561074256896973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7262354493141174, "epoch": 1.91, "learning_rate": 4.4937541091387245e-05, "loss": 0.6166, "step": 2261, "task_loss": 1.1032707691192627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4700183570384979, "epoch": 1.91, "learning_rate": 4.493284493284494e-05, "loss": 0.4338, "step": 2262, "task_loss": 0.7526018619537354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4146284759044647, "epoch": 1.91, "learning_rate": 4.4928148774302625e-05, "loss": 0.3813, "step": 2263, "task_loss": 0.16266366839408875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7851124405860901, "epoch": 1.91, "learning_rate": 4.492345261576031e-05, "loss": 0.6004, "step": 2264, "task_loss": 0.694782018661499 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4905773401260376, "epoch": 1.91, "learning_rate": 4.4918756457218e-05, "loss": 0.4796, "step": 2265, "task_loss": 0.45630016922950745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7270320653915405, "epoch": 1.91, "learning_rate": 4.4914060298675684e-05, "loss": 0.6032, "step": 2266, "task_loss": 1.6697497367858887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4356636106967926, "epoch": 1.92, "learning_rate": 4.490936414013338e-05, "loss": 0.5023, "step": 2267, "task_loss": 0.28711169958114624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41470834612846375, "epoch": 1.92, "learning_rate": 4.4904667981591057e-05, "loss": 0.4409, "step": 2268, "task_loss": 0.6080171465873718 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4068295955657959, "epoch": 1.92, "learning_rate": 4.489997182304875e-05, "loss": 0.5133, "step": 2269, "task_loss": 0.5458813309669495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4273977279663086, "epoch": 1.92, "learning_rate": 4.4895275664506436e-05, "loss": 0.4516, "step": 2270, "task_loss": 0.9136203527450562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37279364466667175, "epoch": 1.92, "learning_rate": 4.489057950596412e-05, "loss": 0.4521, "step": 2271, "task_loss": 0.4039550721645355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43062442541122437, "epoch": 1.92, "learning_rate": 4.488588334742181e-05, "loss": 0.417, "step": 2272, "task_loss": 0.41892319917678833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27266040444374084, "epoch": 1.92, "learning_rate": 4.4881187188879495e-05, "loss": 0.513, "step": 2273, "task_loss": 0.06651925295591354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.435702383518219, "epoch": 1.92, "learning_rate": 4.487649103033719e-05, "loss": 0.4564, "step": 2274, "task_loss": 0.48960229754447937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41149261593818665, "epoch": 1.92, "learning_rate": 4.4871794871794874e-05, "loss": 0.5402, "step": 2275, "task_loss": 0.4844750165939331 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3357287049293518, "epoch": 1.92, "learning_rate": 4.486709871325256e-05, "loss": 0.566, "step": 2276, "task_loss": 0.37340837717056274 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5244724750518799, "epoch": 1.92, "learning_rate": 4.486240255471025e-05, "loss": 0.5831, "step": 2277, "task_loss": 0.22633199393749237 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5554174184799194, "epoch": 1.93, "learning_rate": 4.4857706396167933e-05, "loss": 0.5445, "step": 2278, "task_loss": 0.8629150390625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.462231308221817, "epoch": 1.93, "learning_rate": 4.4853010237625627e-05, "loss": 0.5361, "step": 2279, "task_loss": 0.8522494435310364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.638670027256012, "epoch": 1.93, "learning_rate": 4.484831407908331e-05, "loss": 0.4396, "step": 2280, "task_loss": 0.8751429915428162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4238443970680237, "epoch": 1.93, "learning_rate": 4.4843617920541e-05, "loss": 0.4303, "step": 2281, "task_loss": 0.8385806083679199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4631095230579376, "epoch": 1.93, "learning_rate": 4.4838921761998686e-05, "loss": 0.5288, "step": 2282, "task_loss": 0.8510555624961853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8185307383537292, "epoch": 1.93, "learning_rate": 4.483422560345638e-05, "loss": 0.5788, "step": 2283, "task_loss": 0.8415786623954773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33678174018859863, "epoch": 1.93, "learning_rate": 4.4829529444914065e-05, "loss": 0.4372, "step": 2284, "task_loss": 0.4811292886734009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.469954252243042, "epoch": 1.93, "learning_rate": 4.4824833286371745e-05, "loss": 0.6135, "step": 2285, "task_loss": 0.7966688871383667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.458932101726532, "epoch": 1.93, "learning_rate": 4.482013712782944e-05, "loss": 0.4888, "step": 2286, "task_loss": 1.1896733045578003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7756118774414062, "epoch": 1.93, "learning_rate": 4.4815440969287124e-05, "loss": 0.6377, "step": 2287, "task_loss": 1.0284441709518433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5039101243019104, "epoch": 1.93, "learning_rate": 4.481074481074482e-05, "loss": 0.5732, "step": 2288, "task_loss": 0.7945369482040405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5078351497650146, "epoch": 1.93, "learning_rate": 4.48060486522025e-05, "loss": 0.4439, "step": 2289, "task_loss": 0.829267680644989 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3352982997894287, "epoch": 1.94, "learning_rate": 4.480135249366019e-05, "loss": 0.5587, "step": 2290, "task_loss": 0.4744657874107361 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5995486974716187, "epoch": 1.94, "learning_rate": 4.4796656335117876e-05, "loss": 0.4555, "step": 2291, "task_loss": 1.7109476327896118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5531656742095947, "epoch": 1.94, "learning_rate": 4.479196017657556e-05, "loss": 0.5073, "step": 2292, "task_loss": 1.1096162796020508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43833714723587036, "epoch": 1.94, "learning_rate": 4.4787264018033256e-05, "loss": 0.4772, "step": 2293, "task_loss": 0.5662580728530884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37327587604522705, "epoch": 1.94, "learning_rate": 4.4782567859490935e-05, "loss": 0.6048, "step": 2294, "task_loss": 0.22489942610263824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.590607762336731, "epoch": 1.94, "learning_rate": 4.477787170094863e-05, "loss": 0.4298, "step": 2295, "task_loss": 0.6155186295509338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8453481197357178, "epoch": 1.94, "learning_rate": 4.4773175542406315e-05, "loss": 0.5413, "step": 2296, "task_loss": 1.1669355630874634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3421834409236908, "epoch": 1.94, "learning_rate": 4.4768479383864e-05, "loss": 0.4604, "step": 2297, "task_loss": 0.789575457572937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4658336639404297, "epoch": 1.94, "learning_rate": 4.476378322532169e-05, "loss": 0.5038, "step": 2298, "task_loss": 0.4697781205177307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3698551058769226, "epoch": 1.94, "learning_rate": 4.4759087066779374e-05, "loss": 0.4466, "step": 2299, "task_loss": 0.8331364989280701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.415000319480896, "epoch": 1.94, "learning_rate": 4.475439090823707e-05, "loss": 0.463, "step": 2300, "task_loss": 1.6656509637832642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.186642587184906, "epoch": 1.94, "learning_rate": 4.474969474969475e-05, "loss": 0.4216, "step": 2301, "task_loss": 0.5912747383117676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4632139801979065, "epoch": 1.95, "learning_rate": 4.474499859115244e-05, "loss": 0.5836, "step": 2302, "task_loss": 0.5070899724960327 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36679109930992126, "epoch": 1.95, "learning_rate": 4.4740302432610126e-05, "loss": 0.4828, "step": 2303, "task_loss": 1.5088553428649902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5619080066680908, "epoch": 1.95, "learning_rate": 4.473560627406781e-05, "loss": 0.4727, "step": 2304, "task_loss": 0.6848036050796509 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6623693704605103, "epoch": 1.95, "learning_rate": 4.4730910115525505e-05, "loss": 0.5528, "step": 2305, "task_loss": 0.9009165167808533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5859673023223877, "epoch": 1.95, "learning_rate": 4.4726213956983185e-05, "loss": 0.484, "step": 2306, "task_loss": 1.1272625923156738 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4836239516735077, "epoch": 1.95, "learning_rate": 4.472151779844088e-05, "loss": 0.4051, "step": 2307, "task_loss": 0.4508412778377533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5388451814651489, "epoch": 1.95, "learning_rate": 4.4716821639898564e-05, "loss": 0.4917, "step": 2308, "task_loss": 0.4663715064525604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4498280882835388, "epoch": 1.95, "learning_rate": 4.471212548135625e-05, "loss": 0.6049, "step": 2309, "task_loss": 0.47586867213249207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5159509181976318, "epoch": 1.95, "learning_rate": 4.4707429322813944e-05, "loss": 0.5883, "step": 2310, "task_loss": 0.6426600217819214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4842700660228729, "epoch": 1.95, "learning_rate": 4.470273316427162e-05, "loss": 0.4394, "step": 2311, "task_loss": 0.5726231336593628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7939164638519287, "epoch": 1.95, "learning_rate": 4.4698037005729316e-05, "loss": 0.5487, "step": 2312, "task_loss": 1.0662871599197388 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.475546658039093, "epoch": 1.95, "learning_rate": 4.4693340847187e-05, "loss": 0.5582, "step": 2313, "task_loss": 1.1972315311431885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4680544137954712, "epoch": 1.96, "learning_rate": 4.4688644688644696e-05, "loss": 0.6114, "step": 2314, "task_loss": 1.2322615385055542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45897573232650757, "epoch": 1.96, "learning_rate": 4.4683948530102375e-05, "loss": 0.6508, "step": 2315, "task_loss": 0.9799852967262268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6835887432098389, "epoch": 1.96, "learning_rate": 4.467925237156007e-05, "loss": 0.4714, "step": 2316, "task_loss": 1.3032305240631104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6146513223648071, "epoch": 1.96, "learning_rate": 4.4674556213017755e-05, "loss": 0.5424, "step": 2317, "task_loss": 0.5334000587463379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4581543207168579, "epoch": 1.96, "learning_rate": 4.466986005447544e-05, "loss": 0.3654, "step": 2318, "task_loss": 0.4548841416835785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4519529342651367, "epoch": 1.96, "learning_rate": 4.466516389593313e-05, "loss": 0.344, "step": 2319, "task_loss": 0.2913516163825989 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.300606906414032, "epoch": 1.96, "learning_rate": 4.4660467737390814e-05, "loss": 0.4393, "step": 2320, "task_loss": 0.4161163866519928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2624441385269165, "epoch": 1.96, "learning_rate": 4.465577157884851e-05, "loss": 0.4708, "step": 2321, "task_loss": 0.2939540147781372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5844525098800659, "epoch": 1.96, "learning_rate": 4.4651075420306193e-05, "loss": 0.4915, "step": 2322, "task_loss": 0.42128822207450867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49903208017349243, "epoch": 1.96, "learning_rate": 4.464637926176388e-05, "loss": 0.5129, "step": 2323, "task_loss": 0.5138958692550659 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5361875295639038, "epoch": 1.96, "learning_rate": 4.4641683103221566e-05, "loss": 0.55, "step": 2324, "task_loss": 0.4738599956035614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45664116740226746, "epoch": 1.96, "learning_rate": 4.463698694467925e-05, "loss": 0.5525, "step": 2325, "task_loss": 0.8171424269676208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3576260507106781, "epoch": 1.97, "learning_rate": 4.4632290786136946e-05, "loss": 0.5672, "step": 2326, "task_loss": 1.0750395059585571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3969932794570923, "epoch": 1.97, "learning_rate": 4.462759462759463e-05, "loss": 0.4086, "step": 2327, "task_loss": 0.2767643332481384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.461676687002182, "epoch": 1.97, "learning_rate": 4.462289846905232e-05, "loss": 0.4803, "step": 2328, "task_loss": 0.4770897626876831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18094457685947418, "epoch": 1.97, "learning_rate": 4.4618202310510005e-05, "loss": 0.4833, "step": 2329, "task_loss": 0.36839014291763306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45526620745658875, "epoch": 1.97, "learning_rate": 4.461350615196769e-05, "loss": 0.4242, "step": 2330, "task_loss": 0.7690585255622864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5975671410560608, "epoch": 1.97, "learning_rate": 4.4608809993425384e-05, "loss": 0.5545, "step": 2331, "task_loss": 0.38365310430526733 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45309197902679443, "epoch": 1.97, "learning_rate": 4.4604113834883064e-05, "loss": 0.4787, "step": 2332, "task_loss": 0.25185126066207886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2956143021583557, "epoch": 1.97, "learning_rate": 4.459941767634076e-05, "loss": 0.4137, "step": 2333, "task_loss": 0.6199808120727539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5094056129455566, "epoch": 1.97, "learning_rate": 4.459472151779844e-05, "loss": 0.5664, "step": 2334, "task_loss": 1.099775791168213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.679334282875061, "epoch": 1.97, "learning_rate": 4.459002535925613e-05, "loss": 0.5532, "step": 2335, "task_loss": 1.1441566944122314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6261852979660034, "epoch": 1.97, "learning_rate": 4.4585329200713816e-05, "loss": 0.6711, "step": 2336, "task_loss": 1.765809416770935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.374759316444397, "epoch": 1.97, "learning_rate": 4.45806330421715e-05, "loss": 0.4704, "step": 2337, "task_loss": 1.2894095182418823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6269339323043823, "epoch": 1.98, "learning_rate": 4.4575936883629195e-05, "loss": 0.4444, "step": 2338, "task_loss": 0.9700226187705994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6016024947166443, "epoch": 1.98, "learning_rate": 4.457124072508688e-05, "loss": 0.4864, "step": 2339, "task_loss": 0.2185150533914566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.630359411239624, "epoch": 1.98, "learning_rate": 4.4566544566544575e-05, "loss": 0.4559, "step": 2340, "task_loss": 0.15876653790473938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47041070461273193, "epoch": 1.98, "learning_rate": 4.4561848408002254e-05, "loss": 0.4972, "step": 2341, "task_loss": 0.5566450357437134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2233254313468933, "epoch": 1.98, "learning_rate": 4.455715224945994e-05, "loss": 0.3652, "step": 2342, "task_loss": 0.5470284223556519 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3537633419036865, "epoch": 1.98, "learning_rate": 4.4552456090917634e-05, "loss": 0.3383, "step": 2343, "task_loss": 0.7110322117805481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4108140170574188, "epoch": 1.98, "learning_rate": 4.454775993237532e-05, "loss": 0.4349, "step": 2344, "task_loss": 0.8749346137046814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6935168504714966, "epoch": 1.98, "learning_rate": 4.4543063773833006e-05, "loss": 0.6031, "step": 2345, "task_loss": 1.6098690032958984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23929846286773682, "epoch": 1.98, "learning_rate": 4.453836761529069e-05, "loss": 0.4596, "step": 2346, "task_loss": 0.3002939820289612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28414395451545715, "epoch": 1.98, "learning_rate": 4.4533671456748386e-05, "loss": 0.4375, "step": 2347, "task_loss": 0.3951990008354187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6114182472229004, "epoch": 1.98, "learning_rate": 4.452897529820607e-05, "loss": 0.6948, "step": 2348, "task_loss": 1.3245290517807007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2539694607257843, "epoch": 1.99, "learning_rate": 4.452427913966375e-05, "loss": 0.5274, "step": 2349, "task_loss": 0.08988608419895172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43799450993537903, "epoch": 1.99, "learning_rate": 4.4519582981121445e-05, "loss": 0.4612, "step": 2350, "task_loss": 0.3711228668689728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37054580450057983, "epoch": 1.99, "learning_rate": 4.451488682257913e-05, "loss": 0.5219, "step": 2351, "task_loss": 0.32591941952705383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7378705739974976, "epoch": 1.99, "learning_rate": 4.4510190664036824e-05, "loss": 0.4009, "step": 2352, "task_loss": 0.2216765582561493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.632576048374176, "epoch": 1.99, "learning_rate": 4.4505494505494504e-05, "loss": 0.6144, "step": 2353, "task_loss": 1.029146432876587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3908984065055847, "epoch": 1.99, "learning_rate": 4.45007983469522e-05, "loss": 0.4335, "step": 2354, "task_loss": 0.15163390338420868 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5609326958656311, "epoch": 1.99, "learning_rate": 4.449610218840988e-05, "loss": 0.4447, "step": 2355, "task_loss": 0.6002312302589417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4824296236038208, "epoch": 1.99, "learning_rate": 4.449140602986757e-05, "loss": 0.5443, "step": 2356, "task_loss": 0.8916800022125244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2878044843673706, "epoch": 1.99, "learning_rate": 4.448670987132526e-05, "loss": 0.3716, "step": 2357, "task_loss": 0.20491690933704376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4089168906211853, "epoch": 1.99, "learning_rate": 4.448201371278294e-05, "loss": 0.5752, "step": 2358, "task_loss": 0.19049356877803802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5704123377799988, "epoch": 1.99, "learning_rate": 4.4477317554240635e-05, "loss": 0.5579, "step": 2359, "task_loss": 0.941679835319519 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1822560429573059, "epoch": 1.99, "learning_rate": 4.447262139569832e-05, "loss": 0.3378, "step": 2360, "task_loss": 0.5127880573272705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2383851557970047, "epoch": 2.0, "learning_rate": 4.446792523715601e-05, "loss": 0.4302, "step": 2361, "task_loss": 0.5232435464859009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5691317319869995, "epoch": 2.0, "learning_rate": 4.4463229078613694e-05, "loss": 0.572, "step": 2362, "task_loss": 0.2830357253551483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42567265033721924, "epoch": 2.0, "learning_rate": 4.445853292007138e-05, "loss": 0.5011, "step": 2363, "task_loss": 1.3851203918457031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31918177008628845, "epoch": 2.0, "learning_rate": 4.4453836761529074e-05, "loss": 0.5798, "step": 2364, "task_loss": 1.4629466533660889 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6944785714149475, "epoch": 2.0, "learning_rate": 4.444914060298676e-05, "loss": 0.506, "step": 2365, "task_loss": 0.9923344850540161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5985206365585327, "epoch": 2.0, "learning_rate": 4.4444444444444447e-05, "loss": 0.5156, "step": 2366, "task_loss": 0.8847988843917847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -0.007113098166882992, "compression/movement_sparsity/linear_layer_sparsity": 0.0010076994827391472, "compression/movement_sparsity/model_sparsity": 0.000973081905460848, "compression_loss": 0.0, "distillation_loss": 0.5030602812767029, "epoch": 2.0, "learning_rate": 4.443974828590213e-05, "loss": 0.972, "step": 2367, "task_loss": 1.5439765453338623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0008443568368990961, "compression/movement_sparsity/importance_threshold": -0.00710709217381425, "compression/movement_sparsity/linear_layer_sparsity": 0.0010260150042278328, "compression/movement_sparsity/model_sparsity": 0.0009907682324412628, "compression_loss": 0.0912206843495369, "distillation_loss": 0.7535605430603027, "epoch": 2.0, "learning_rate": 4.443505212735982e-05, "loss": 0.7224, "step": 2368, "task_loss": 0.9886881113052368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0016882382476638425, "compression/movement_sparsity/importance_threshold": -0.0071010895624982725, "compression/movement_sparsity/linear_layer_sparsity": 0.0010290675911426139, "compression/movement_sparsity/model_sparsity": 0.0009937159536046651, "compression_loss": 0.182390034198761, "distillation_loss": 0.3908480107784271, "epoch": 2.0, "learning_rate": 4.443035596881751e-05, "loss": 0.6329, "step": 2369, "task_loss": 0.5341839790344238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.002531644366179364, "compression/movement_sparsity/importance_threshold": -0.007095090331982722, "compression/movement_sparsity/linear_layer_sparsity": 0.0010290675911426139, "compression/movement_sparsity/model_sparsity": 0.0009937159536046651, "compression_loss": 0.27350762486457825, "distillation_loss": 0.579408586025238, "epoch": 2.0, "learning_rate": 4.44256598102752e-05, "loss": 0.8253, "step": 2370, "task_loss": 0.7175576686859131 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0033745753263308975, "compression/movement_sparsity/importance_threshold": -0.007089094481315259, "compression/movement_sparsity/linear_layer_sparsity": 0.0010473831126312993, "compression/movement_sparsity/model_sparsity": 0.00101140228058508, "compression_loss": 0.36457380652427673, "distillation_loss": 0.3494855761528015, "epoch": 2.0, "learning_rate": 4.4420963651732885e-05, "loss": 0.7495, "step": 2371, "task_loss": 0.6862955689430237 }, { "compression/movement_sparsity/importance_regularization_factor": 0.004217031262003457, "compression/movement_sparsity/importance_threshold": -0.0070831020095435465, "compression/movement_sparsity/linear_layer_sparsity": 0.0010229743414806878, "compression/movement_sparsity/model_sparsity": 0.0009878320258136547, "compression_loss": 0.4555884599685669, "distillation_loss": 0.4343947768211365, "epoch": 2.01, "learning_rate": 4.441626749319057e-05, "loss": 1.0708, "step": 2372, "task_loss": 1.1966631412506104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.005059012307081834, "compression/movement_sparsity/importance_threshold": -0.007077112915715249, "compression/movement_sparsity/linear_layer_sparsity": 0.001059605384458059, "compression/movement_sparsity/model_sparsity": 0.001023204679774484, "compression_loss": 0.5465518832206726, "distillation_loss": 0.5528329610824585, "epoch": 2.01, "learning_rate": 4.441157133464826e-05, "loss": 1.0429, "step": 2373, "task_loss": 0.7918266654014587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.005900518595452042, "compression/movement_sparsity/importance_threshold": -0.0070711271988780225, "compression/movement_sparsity/linear_layer_sparsity": 0.001059605384458059, "compression/movement_sparsity/model_sparsity": 0.001023204679774484, "compression_loss": 0.6374635100364685, "distillation_loss": 0.27731049060821533, "epoch": 2.01, "learning_rate": 4.440687517610595e-05, "loss": 1.1238, "step": 2374, "task_loss": 0.10646284371614456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.006741550260998652, "compression/movement_sparsity/importance_threshold": -0.0070651448580795325, "compression/movement_sparsity/linear_layer_sparsity": 0.0010473950367989352, "compression/movement_sparsity/model_sparsity": 0.0010114137951208744, "compression_loss": 0.7283238172531128, "distillation_loss": 0.6554473638534546, "epoch": 2.01, "learning_rate": 4.440217901756363e-05, "loss": 1.2863, "step": 2375, "task_loss": 1.0375757217407227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.007582107437607011, "compression/movement_sparsity/importance_threshold": -0.0070591658923674395, "compression/movement_sparsity/linear_layer_sparsity": 0.001059605384458059, "compression/movement_sparsity/model_sparsity": 0.001023204679774484, "compression_loss": 0.8191325068473816, "distillation_loss": 0.40347737073898315, "epoch": 2.01, "learning_rate": 4.4397482859021324e-05, "loss": 1.2207, "step": 2376, "task_loss": 0.5409803986549377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.008422190259162132, "compression/movement_sparsity/importance_threshold": -0.0070531903007894055, "compression/movement_sparsity/linear_layer_sparsity": 0.0010474069609665711, "compression/movement_sparsity/model_sparsity": 0.0010114253096566688, "compression_loss": 0.9098894596099854, "distillation_loss": 0.4309406578540802, "epoch": 2.01, "learning_rate": 4.439278670047901e-05, "loss": 1.4478, "step": 2377, "task_loss": 1.1037018299102783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.009261798859549142, "compression/movement_sparsity/importance_threshold": -0.007047218082393094, "compression/movement_sparsity/linear_layer_sparsity": 0.0010474069609665711, "compression/movement_sparsity/model_sparsity": 0.0010114253096566688, "compression_loss": 1.000595211982727, "distillation_loss": 0.6870647668838501, "epoch": 2.01, "learning_rate": 4.43880905419367e-05, "loss": 1.7083, "step": 2378, "task_loss": 0.15179899334907532 }, { "compression/movement_sparsity/importance_regularization_factor": 0.010100933372653165, "compression/movement_sparsity/importance_threshold": -0.007041249236226166, "compression/movement_sparsity/linear_layer_sparsity": 0.0010367229067648378, "compression/movement_sparsity/model_sparsity": 0.0010011082855847604, "compression_loss": 1.0912504196166992, "distillation_loss": 0.46529969573020935, "epoch": 2.01, "learning_rate": 4.438339438339438e-05, "loss": 1.6751, "step": 2379, "task_loss": 0.29557526111602783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.010939593932359548, "compression/movement_sparsity/importance_threshold": -0.007035283761336281, "compression/movement_sparsity/linear_layer_sparsity": 0.0010367229067648378, "compression/movement_sparsity/model_sparsity": 0.0010011082855847604, "compression_loss": 1.1818541288375854, "distillation_loss": 0.38021931052207947, "epoch": 2.01, "learning_rate": 4.437869822485207e-05, "loss": 1.5645, "step": 2380, "task_loss": 0.5506501197814941 }, { "compression/movement_sparsity/importance_regularization_factor": 0.011777780672553195, "compression/movement_sparsity/importance_threshold": -0.0070293216567711035, "compression/movement_sparsity/linear_layer_sparsity": 0.001027565146020495, "compression/movement_sparsity/model_sparsity": 0.000992265122094553, "compression_loss": 1.2724051475524902, "distillation_loss": 0.5424460768699646, "epoch": 2.01, "learning_rate": 4.437400206630976e-05, "loss": 1.8178, "step": 2381, "task_loss": 0.9462644457817078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.012615493727119342, "compression/movement_sparsity/importance_threshold": -0.0070233629215782955, "compression/movement_sparsity/linear_layer_sparsity": 0.0010397754936796188, "compression/movement_sparsity/model_sparsity": 0.0010040560067481627, "compression_loss": 1.3629070520401, "distillation_loss": 0.30560755729675293, "epoch": 2.01, "learning_rate": 4.436930590776745e-05, "loss": 1.8587, "step": 2382, "task_loss": 0.8868820667266846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.013452733229942782, "compression/movement_sparsity/importance_threshold": -0.00701740755480552, "compression/movement_sparsity/linear_layer_sparsity": 0.0010428280805943996, "compression/movement_sparsity/model_sparsity": 0.0010070037279115652, "compression_loss": 1.453355312347412, "distillation_loss": 0.4208690822124481, "epoch": 2.01, "learning_rate": 4.4364609749225135e-05, "loss": 1.8683, "step": 2383, "task_loss": 0.30644044280052185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.014289499314909304, "compression/movement_sparsity/importance_threshold": -0.0070114555555004345, "compression/movement_sparsity/linear_layer_sparsity": 0.001067248775912647, "compression/movement_sparsity/model_sparsity": 0.0010305854972187847, "compression_loss": 1.543751835823059, "distillation_loss": 0.7275977730751038, "epoch": 2.02, "learning_rate": 4.435991359068282e-05, "loss": 2.0909, "step": 2384, "task_loss": 0.5624712109565735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.015125792115903702, "compression/movement_sparsity/importance_threshold": -0.007005506922710704, "compression/movement_sparsity/linear_layer_sparsity": 0.001070313286995064, "compression/movement_sparsity/model_sparsity": 0.0010335447329179817, "compression_loss": 1.6340981721878052, "distillation_loss": 0.46195363998413086, "epoch": 2.02, "learning_rate": 4.4355217432140514e-05, "loss": 2.1354, "step": 2385, "task_loss": 0.5160710215568542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.015961611766811212, "compression/movement_sparsity/importance_threshold": -0.006999561655483989, "compression/movement_sparsity/linear_layer_sparsity": 0.0010825236346541877, "compression/movement_sparsity/model_sparsity": 0.0010453356175715916, "compression_loss": 1.7243919372558594, "distillation_loss": 0.6593796014785767, "epoch": 2.02, "learning_rate": 4.43505212735982e-05, "loss": 2.3005, "step": 2386, "task_loss": 1.0696685314178467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.01679695840151696, "compression/movement_sparsity/importance_threshold": -0.0069936197528679515, "compression/movement_sparsity/linear_layer_sparsity": 0.0010825236346541877, "compression/movement_sparsity/model_sparsity": 0.0010453356175715916, "compression_loss": 1.814634084701538, "distillation_loss": 0.4293580651283264, "epoch": 2.02, "learning_rate": 4.434582511505589e-05, "loss": 2.2533, "step": 2387, "task_loss": 0.5009238719940186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.017631832153905957, "compression/movement_sparsity/importance_threshold": -0.006987681213910255, "compression/movement_sparsity/linear_layer_sparsity": 0.0010825236346541877, "compression/movement_sparsity/model_sparsity": 0.0010453356175715916, "compression_loss": 1.9048253297805786, "distillation_loss": 0.31021663546562195, "epoch": 2.02, "learning_rate": 4.434112895651357e-05, "loss": 2.4024, "step": 2388, "task_loss": 0.4507502317428589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.018466233157863443, "compression/movement_sparsity/importance_threshold": -0.006981746037658559, "compression/movement_sparsity/linear_layer_sparsity": 0.0011252598514611208, "compression/movement_sparsity/model_sparsity": 0.0010866037138592256, "compression_loss": 1.994964838027954, "distillation_loss": 0.4041002094745636, "epoch": 2.02, "learning_rate": 4.433643279797126e-05, "loss": 2.5306, "step": 2389, "task_loss": 0.9392569661140442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.01930016154727454, "compression/movement_sparsity/importance_threshold": -0.006975814223160528, "compression/movement_sparsity/linear_layer_sparsity": 0.0011374821232878803, "compression/movement_sparsity/model_sparsity": 0.00109840611304863, "compression_loss": 2.085052490234375, "distillation_loss": 0.5343973636627197, "epoch": 2.02, "learning_rate": 4.433173663942895e-05, "loss": 2.5692, "step": 2390, "task_loss": 0.6908472180366516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.02013361745602449, "compression/movement_sparsity/importance_threshold": -0.00696988576946382, "compression/movement_sparsity/linear_layer_sparsity": 0.0011558214931118376, "compression/movement_sparsity/model_sparsity": 0.0011161154691006338, "compression_loss": 2.1750876903533936, "distillation_loss": 0.542097806930542, "epoch": 2.02, "learning_rate": 4.432704048088664e-05, "loss": 2.7277, "step": 2391, "task_loss": 0.2988603115081787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.020966601017998188, "compression/movement_sparsity/importance_threshold": -0.006963960675616102, "compression/movement_sparsity/linear_layer_sparsity": 0.0011619266669413994, "compression/movement_sparsity/model_sparsity": 0.0011220109114274386, "compression_loss": 2.2650704383850098, "distillation_loss": 0.2648451328277588, "epoch": 2.02, "learning_rate": 4.4322344322344325e-05, "loss": 2.7844, "step": 2392, "task_loss": 0.5819256901741028 }, { "compression/movement_sparsity/importance_regularization_factor": 0.021799112367080653, "compression/movement_sparsity/importance_threshold": -0.006958038940665034, "compression/movement_sparsity/linear_layer_sparsity": 0.0011588740800266186, "compression/movement_sparsity/model_sparsity": 0.0011190631902640363, "compression_loss": 2.354999303817749, "distillation_loss": 0.41611185669898987, "epoch": 2.02, "learning_rate": 4.431764816380201e-05, "loss": 2.8783, "step": 2393, "task_loss": 0.13560524582862854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.022631151637157676, "compression/movement_sparsity/importance_threshold": -0.006952120563658275, "compression/movement_sparsity/linear_layer_sparsity": 0.001177189601515304, "compression/movement_sparsity/model_sparsity": 0.0011367495172444509, "compression_loss": 2.4448742866516113, "distillation_loss": 0.5730608701705933, "epoch": 2.02, "learning_rate": 4.43129520052597e-05, "loss": 2.9927, "step": 2394, "task_loss": 1.2174558639526367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.023462718962113938, "compression/movement_sparsity/importance_threshold": -0.006946205543643488, "compression/movement_sparsity/linear_layer_sparsity": 0.0011867408597916304, "compression/movement_sparsity/model_sparsity": 0.0011459726604158781, "compression_loss": 2.5346992015838623, "distillation_loss": 0.6160603761672974, "epoch": 2.02, "learning_rate": 4.430825584671739e-05, "loss": 3.0502, "step": 2395, "task_loss": 1.6081432104110718 }, { "compression/movement_sparsity/importance_regularization_factor": 0.024293814475834785, "compression/movement_sparsity/importance_threshold": -0.006940293879668336, "compression/movement_sparsity/linear_layer_sparsity": 0.0012054260304770277, "compression/movement_sparsity/model_sparsity": 0.0011640159380059236, "compression_loss": 2.624471426010132, "distillation_loss": 0.6860619187355042, "epoch": 2.03, "learning_rate": 4.430355968817507e-05, "loss": 3.0842, "step": 2396, "task_loss": 0.9191383719444275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.025124438312205122, "compression/movement_sparsity/importance_threshold": -0.0069343855707804805, "compression/movement_sparsity/linear_layer_sparsity": 0.0012428202201830941, "compression/movement_sparsity/model_sparsity": 0.0012001255222576035, "compression_loss": 2.7141919136047363, "distillation_loss": 0.7009986639022827, "epoch": 2.03, "learning_rate": 4.4298863529632764e-05, "loss": 3.2826, "step": 2397, "task_loss": 1.0564738512039185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.025954590605110184, "compression/movement_sparsity/importance_threshold": -0.006928480616027584, "compression/movement_sparsity/linear_layer_sparsity": 0.001250833260834394, "compression/movement_sparsity/model_sparsity": 0.0012078632903115348, "compression_loss": 2.803861141204834, "distillation_loss": 0.4235285520553589, "epoch": 2.03, "learning_rate": 4.429416737109045e-05, "loss": 3.2134, "step": 2398, "task_loss": 1.3619475364685059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.026784271488435207, "compression/movement_sparsity/importance_threshold": -0.006922579014457307, "compression/movement_sparsity/linear_layer_sparsity": 0.001274872382788294, "compression/movement_sparsity/model_sparsity": 0.001231076594473329, "compression_loss": 2.8934760093688965, "distillation_loss": 0.7963517904281616, "epoch": 2.03, "learning_rate": 4.4289471212548136e-05, "loss": 3.3962, "step": 2399, "task_loss": 1.2320644855499268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.027613481096065207, "compression/movement_sparsity/importance_threshold": -0.006916680765117312, "compression/movement_sparsity/linear_layer_sparsity": 0.0012901353173621986, "compression/movement_sparsity/model_sparsity": 0.0012458152002903413, "compression_loss": 2.983038902282715, "distillation_loss": 0.4581056237220764, "epoch": 2.03, "learning_rate": 4.428477505400582e-05, "loss": 3.4606, "step": 2400, "task_loss": 1.7261179685592651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.028442219561885307, "compression/movement_sparsity/importance_threshold": -0.006910785867055262, "compression/movement_sparsity/linear_layer_sparsity": 0.001311515349933301, "compression/movement_sparsity/model_sparsity": 0.001266460762969953, "compression_loss": 3.0725505352020264, "distillation_loss": 0.47131088376045227, "epoch": 2.03, "learning_rate": 4.428007889546351e-05, "loss": 3.5793, "step": 2401, "task_loss": 0.6231713891029358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.029270487019780744, "compression/movement_sparsity/importance_threshold": -0.006904894319318817, "compression/movement_sparsity/linear_layer_sparsity": 0.001368178994538922, "compression/movement_sparsity/model_sparsity": 0.0013211778370656108, "compression_loss": 3.1620097160339355, "distillation_loss": 0.7861278057098389, "epoch": 2.03, "learning_rate": 4.42753827369212e-05, "loss": 3.6761, "step": 2402, "task_loss": 1.0316413640975952 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03009828360363631, "compression/movement_sparsity/importance_threshold": -0.006899006120955642, "compression/movement_sparsity/linear_layer_sparsity": 0.0013780760536766883, "compression/movement_sparsity/model_sparsity": 0.0013307349017750796, "compression_loss": 3.251415729522705, "distillation_loss": 0.25536906719207764, "epoch": 2.03, "learning_rate": 4.427068657837889e-05, "loss": 3.6655, "step": 2403, "task_loss": 0.42422783374786377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03092560944733791, "compression/movement_sparsity/importance_threshold": -0.0068931212710133935, "compression/movement_sparsity/linear_layer_sparsity": 0.0014059309092740644, "compression/movement_sparsity/model_sparsity": 0.001357632857391127, "compression_loss": 3.3407721519470215, "distillation_loss": 0.4604566991329193, "epoch": 2.03, "learning_rate": 4.4265990419836575e-05, "loss": 3.851, "step": 2404, "task_loss": 0.8720980882644653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03175246468477011, "compression/movement_sparsity/importance_threshold": -0.006887239768539737, "compression/movement_sparsity/linear_layer_sparsity": 0.0014005888821731977, "compression/movement_sparsity/model_sparsity": 0.0013524743453551726, "compression_loss": 3.430074691772461, "distillation_loss": 0.49304842948913574, "epoch": 2.03, "learning_rate": 4.426129426129426e-05, "loss": 3.8379, "step": 2405, "task_loss": 0.3987417221069336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03257884944981815, "compression/movement_sparsity/importance_threshold": -0.006881361612582334, "compression/movement_sparsity/linear_layer_sparsity": 0.0014616406204688162, "compression/movement_sparsity/model_sparsity": 0.0014114287686232217, "compression_loss": 3.5193302631378174, "distillation_loss": 0.40988579392433167, "epoch": 2.03, "learning_rate": 4.425659810275195e-05, "loss": 3.8838, "step": 2406, "task_loss": 0.4417437016963959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03340476387636726, "compression/movement_sparsity/importance_threshold": -0.006875486802188845, "compression/movement_sparsity/linear_layer_sparsity": 0.0014994163835392303, "compression/movement_sparsity/model_sparsity": 0.0014479068180203267, "compression_loss": 3.6085281372070312, "distillation_loss": 0.2208811640739441, "epoch": 2.03, "learning_rate": 4.425190194420964e-05, "loss": 4.0404, "step": 2407, "task_loss": 0.7954769134521484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03423020809830246, "compression/movement_sparsity/importance_threshold": -0.0068696153364069335, "compression/movement_sparsity/linear_layer_sparsity": 0.0015070478508261826, "compression/movement_sparsity/model_sparsity": 0.0014552761209288328, "compression_loss": 3.6976757049560547, "distillation_loss": 0.5974250435829163, "epoch": 2.04, "learning_rate": 4.424720578566733e-05, "loss": 4.2301, "step": 2408, "task_loss": 0.9816263914108276 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03505518224950899, "compression/movement_sparsity/importance_threshold": -0.00686374721428426, "compression/movement_sparsity/linear_layer_sparsity": 0.001520784491942697, "compression/movement_sparsity/model_sparsity": 0.0014685408661641438, "compression_loss": 3.7867698669433594, "distillation_loss": 0.6589679718017578, "epoch": 2.04, "learning_rate": 4.4242509627125013e-05, "loss": 4.3039, "step": 2409, "task_loss": 0.3757113218307495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03587968646387196, "compression/movement_sparsity/importance_threshold": -0.006857882434868488, "compression/movement_sparsity/linear_layer_sparsity": 0.0015410078802531204, "compression/movement_sparsity/model_sparsity": 0.001488069518871685, "compression_loss": 3.875814437866211, "distillation_loss": 0.4095323085784912, "epoch": 2.04, "learning_rate": 4.42378134685827e-05, "loss": 4.2843, "step": 2410, "task_loss": 0.3674198389053345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0367037208752764, "compression/movement_sparsity/importance_threshold": -0.006852020997207278, "compression/movement_sparsity/linear_layer_sparsity": 0.0015953820846726558, "compression/movement_sparsity/model_sparsity": 0.0015405758020947912, "compression_loss": 3.9648020267486572, "distillation_loss": 0.3195588290691376, "epoch": 2.04, "learning_rate": 4.423311731004039e-05, "loss": 4.4279, "step": 2411, "task_loss": 0.9604442119598389 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03752728561760765, "compression/movement_sparsity/importance_threshold": -0.006846162900348293, "compression/movement_sparsity/linear_layer_sparsity": 0.0016592956232008816, "compression/movement_sparsity/model_sparsity": 0.0016022937139535298, "compression_loss": 4.05374002456665, "distillation_loss": 0.7042628526687622, "epoch": 2.04, "learning_rate": 4.422842115149808e-05, "loss": 4.521, "step": 2412, "task_loss": 0.5779948234558105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.038350380824750396, "compression/movement_sparsity/importance_threshold": -0.006840308143339195, "compression/movement_sparsity/linear_layer_sparsity": 0.0016955450928139051, "compression/movement_sparsity/model_sparsity": 0.0016372979027689338, "compression_loss": 4.1426239013671875, "distillation_loss": 0.3178732991218567, "epoch": 2.04, "learning_rate": 4.422372499295576e-05, "loss": 4.69, "step": 2413, "task_loss": 0.4287355840206146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03917300663059042, "compression/movement_sparsity/importance_threshold": -0.006834456725227644, "compression/movement_sparsity/linear_layer_sparsity": 0.0017270248953725835, "compression/movement_sparsity/model_sparsity": 0.0016676962772665216, "compression_loss": 4.231451511383057, "distillation_loss": 0.7352632284164429, "epoch": 2.04, "learning_rate": 4.421902883441345e-05, "loss": 4.7138, "step": 2414, "task_loss": 0.930866539478302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03999516316901264, "compression/movement_sparsity/importance_threshold": -0.006828608645061302, "compression/movement_sparsity/linear_layer_sparsity": 0.0017682348187221262, "compression/movement_sparsity/model_sparsity": 0.0017074905129724545, "compression_loss": 4.320225238800049, "distillation_loss": 0.5509814023971558, "epoch": 2.04, "learning_rate": 4.421433267587114e-05, "loss": 4.7164, "step": 2415, "task_loss": 0.37887588143348694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04081685057390205, "compression/movement_sparsity/importance_threshold": -0.0068227639018878326, "compression/movement_sparsity/linear_layer_sparsity": 0.001809063168707321, "compression/movement_sparsity/model_sparsity": 0.0017469162835329621, "compression_loss": 4.4089436531066895, "distillation_loss": 0.48715072870254517, "epoch": 2.04, "learning_rate": 4.420963651732883e-05, "loss": 4.9244, "step": 2416, "task_loss": 0.7425234317779541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.041638068979143905, "compression/movement_sparsity/importance_threshold": -0.0068169224947548955, "compression/movement_sparsity/linear_layer_sparsity": 0.0018472205051420826, "compression/movement_sparsity/model_sparsity": 0.0017837627980754928, "compression_loss": 4.497612953186035, "distillation_loss": 0.4352259039878845, "epoch": 2.04, "learning_rate": 4.420494035878652e-05, "loss": 4.955, "step": 2417, "task_loss": 1.043090581893921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04245881851862332, "compression/movement_sparsity/importance_threshold": -0.0068110844227101544, "compression/movement_sparsity/linear_layer_sparsity": 0.0018849962682124966, "compression/movement_sparsity/model_sparsity": 0.001820240847472598, "compression_loss": 4.586228370666504, "distillation_loss": 0.4168151021003723, "epoch": 2.04, "learning_rate": 4.4200244200244204e-05, "loss": 5.1609, "step": 2418, "task_loss": 0.3300071060657501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04327909932622542, "compression/movement_sparsity/importance_threshold": -0.006805249684801271, "compression/movement_sparsity/linear_layer_sparsity": 0.001903502576383356, "compression/movement_sparsity/model_sparsity": 0.0018381114070257253, "compression_loss": 4.674788475036621, "distillation_loss": 0.5442335605621338, "epoch": 2.04, "learning_rate": 4.419554804170189e-05, "loss": 5.2098, "step": 2419, "task_loss": 0.23393391072750092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04409891153583545, "compression/movement_sparsity/importance_threshold": -0.006799418280075906, "compression/movement_sparsity/linear_layer_sparsity": 0.0019372718191281201, "compression/movement_sparsity/model_sparsity": 0.0018707205723958648, "compression_loss": 4.763298988342285, "distillation_loss": 0.5652453899383545, "epoch": 2.05, "learning_rate": 4.419085188315958e-05, "loss": 5.2655, "step": 2420, "task_loss": 0.7283906936645508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.044918255281338304, "compression/movement_sparsity/importance_threshold": -0.006793590207581722, "compression/movement_sparsity/linear_layer_sparsity": 0.0019805923201492103, "compression/movement_sparsity/model_sparsity": 0.0019125528809374316, "compression_loss": 4.851759433746338, "distillation_loss": 0.3847142457962036, "epoch": 2.05, "learning_rate": 4.418615572461727e-05, "loss": 5.3616, "step": 2421, "task_loss": 0.6755742430686951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04573713069661933, "compression/movement_sparsity/importance_threshold": -0.006787765466366381, "compression/movement_sparsity/linear_layer_sparsity": 0.002011702473511177, "compression/movement_sparsity/model_sparsity": 0.0019425943048253884, "compression_loss": 4.940167427062988, "distillation_loss": 0.5764294862747192, "epoch": 2.05, "learning_rate": 4.418145956607495e-05, "loss": 5.3885, "step": 2422, "task_loss": 1.3675988912582397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04655553791556333, "compression/movement_sparsity/importance_threshold": -0.006781944055477547, "compression/movement_sparsity/linear_layer_sparsity": 0.002118924588892857, "compression/movement_sparsity/model_sparsity": 0.0020461330106898994, "compression_loss": 5.028520107269287, "distillation_loss": 0.589012086391449, "epoch": 2.05, "learning_rate": 4.417676340753264e-05, "loss": 5.4714, "step": 2423, "task_loss": 0.4511723220348358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04737347707205597, "compression/movement_sparsity/importance_threshold": -0.006776125973962877, "compression/movement_sparsity/linear_layer_sparsity": 0.002177877673684564, "compression/movement_sparsity/model_sparsity": 0.002103060875658109, "compression_loss": 5.1168212890625, "distillation_loss": 0.3648529052734375, "epoch": 2.05, "learning_rate": 4.417206724899033e-05, "loss": 5.664, "step": 2424, "task_loss": 0.3508216142654419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04819094829998227, "compression/movement_sparsity/importance_threshold": -0.006770311220870035, "compression/movement_sparsity/linear_layer_sparsity": 0.0022480871727245254, "compression/movement_sparsity/model_sparsity": 0.002170858462416365, "compression_loss": 5.205070495605469, "distillation_loss": 0.6499072909355164, "epoch": 2.05, "learning_rate": 4.4167371090448015e-05, "loss": 5.8081, "step": 2425, "task_loss": 0.9414442181587219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04900795173322714, "compression/movement_sparsity/importance_threshold": -0.006764499795246684, "compression/movement_sparsity/linear_layer_sparsity": 0.002239692558708878, "compression/movement_sparsity/model_sparsity": 0.0021627522292170086, "compression_loss": 5.2932634353637695, "distillation_loss": 0.7931656241416931, "epoch": 2.05, "learning_rate": 4.41626749319057e-05, "loss": 5.8127, "step": 2426, "task_loss": 0.3164198696613312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04982448750567581, "compression/movement_sparsity/importance_threshold": -0.0067586916961404845, "compression/movement_sparsity/linear_layer_sparsity": 0.002318869031811008, "compression/movement_sparsity/model_sparsity": 0.0022392087468927594, "compression_loss": 5.381406307220459, "distillation_loss": 0.3174822926521301, "epoch": 2.05, "learning_rate": 4.415797877336339e-05, "loss": 5.7329, "step": 2427, "task_loss": 0.4446188509464264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05064055575121351, "compression/movement_sparsity/importance_threshold": -0.006752886922599099, "compression/movement_sparsity/linear_layer_sparsity": 0.0023978547182309648, "compression/movement_sparsity/model_sparsity": 0.0023154810319957974, "compression_loss": 5.469493389129639, "distillation_loss": 0.38790449500083923, "epoch": 2.05, "learning_rate": 4.415328261482108e-05, "loss": 5.8809, "step": 2428, "task_loss": 0.14009346067905426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05145615660372527, "compression/movement_sparsity/importance_threshold": -0.00674708547367019, "compression/movement_sparsity/linear_layer_sparsity": 0.002482373218433962, "compression/movement_sparsity/model_sparsity": 0.002397096061707503, "compression_loss": 5.557536602020264, "distillation_loss": 0.5907278060913086, "epoch": 2.05, "learning_rate": 4.414858645627877e-05, "loss": 6.1285, "step": 2429, "task_loss": 0.85813307762146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05227129019709631, "compression/movement_sparsity/importance_threshold": -0.006741287348401417, "compression/movement_sparsity/linear_layer_sparsity": 0.0024770311913330954, "compression/movement_sparsity/model_sparsity": 0.0023919375496715485, "compression_loss": 5.645522594451904, "distillation_loss": 0.5761442184448242, "epoch": 2.05, "learning_rate": 4.4143890297736454e-05, "loss": 6.1872, "step": 2430, "task_loss": 1.1426481008529663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05308595666521165, "compression/movement_sparsity/importance_threshold": -0.006735492545840445, "compression/movement_sparsity/linear_layer_sparsity": 0.002520148981504376, "compression/movement_sparsity/model_sparsity": 0.002433574111104608, "compression_loss": 5.733456134796143, "distillation_loss": 0.35532650351524353, "epoch": 2.05, "learning_rate": 4.413919413919414e-05, "loss": 6.231, "step": 2431, "task_loss": 0.5929766297340393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.053900156141956646, "compression/movement_sparsity/importance_threshold": -0.0067297010650349326, "compression/movement_sparsity/linear_layer_sparsity": 0.002603713548296504, "compression/movement_sparsity/model_sparsity": 0.0025142679779527497, "compression_loss": 5.821337699890137, "distillation_loss": 0.6115788221359253, "epoch": 2.06, "learning_rate": 4.4134497980651826e-05, "loss": 6.3934, "step": 2432, "task_loss": 0.8109793066978455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.054713888761215856, "compression/movement_sparsity/importance_threshold": -0.0067239129050325475, "compression/movement_sparsity/linear_layer_sparsity": 0.002662475846406037, "compression/movement_sparsity/model_sparsity": 0.002571011610348247, "compression_loss": 5.909163475036621, "distillation_loss": 0.4654971957206726, "epoch": 2.06, "learning_rate": 4.412980182210952e-05, "loss": 6.4128, "step": 2433, "task_loss": 0.997048020362854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0555271546568753, "compression/movement_sparsity/importance_threshold": -0.006718128064880944, "compression/movement_sparsity/linear_layer_sparsity": 0.0027195210643760053, "compression/movement_sparsity/model_sparsity": 0.00262609714958933, "compression_loss": 5.996937274932861, "distillation_loss": 0.4822511672973633, "epoch": 2.06, "learning_rate": 4.4125105663567206e-05, "loss": 6.4682, "step": 2434, "task_loss": 0.5110466480255127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.056339953962819544, "compression/movement_sparsity/importance_threshold": -0.006712346543627788, "compression/movement_sparsity/linear_layer_sparsity": 0.0027748611263740458, "compression/movement_sparsity/model_sparsity": 0.002679536110211794, "compression_loss": 6.084654808044434, "distillation_loss": 0.5670910477638245, "epoch": 2.06, "learning_rate": 4.412040950502489e-05, "loss": 6.5911, "step": 2435, "task_loss": 1.2059032917022705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05715228681293394, "compression/movement_sparsity/importance_threshold": -0.006706568340320741, "compression/movement_sparsity/linear_layer_sparsity": 0.0028507942258792214, "compression/movement_sparsity/model_sparsity": 0.0027528606741514298, "compression_loss": 6.172325611114502, "distillation_loss": 0.9450353384017944, "epoch": 2.06, "learning_rate": 4.411571334648258e-05, "loss": 6.7745, "step": 2436, "task_loss": 1.0160691738128662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05796415334110361, "compression/movement_sparsity/importance_threshold": -0.006700793454007463, "compression/movement_sparsity/linear_layer_sparsity": 0.0029471415003769945, "compression/movement_sparsity/model_sparsity": 0.002845898123371319, "compression_loss": 6.259943962097168, "distillation_loss": 0.7630011439323425, "epoch": 2.06, "learning_rate": 4.4111017187940265e-05, "loss": 6.8131, "step": 2437, "task_loss": 1.2771215438842773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.058775553681213566, "compression/movement_sparsity/importance_threshold": -0.006695021883735618, "compression/movement_sparsity/linear_layer_sparsity": 0.0029936934508274036, "compression/movement_sparsity/model_sparsity": 0.0028908508711132067, "compression_loss": 6.347505569458008, "distillation_loss": 0.4045135974884033, "epoch": 2.06, "learning_rate": 4.410632102939796e-05, "loss": 6.8475, "step": 2438, "task_loss": 0.535200297832489 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05958648796714905, "compression/movement_sparsity/importance_threshold": -0.006689253628552869, "compression/movement_sparsity/linear_layer_sparsity": 0.003114664131493234, "compression/movement_sparsity/model_sparsity": 0.003007665836748823, "compression_loss": 6.435020446777344, "distillation_loss": 0.454659640789032, "epoch": 2.06, "learning_rate": 4.410162487085564e-05, "loss": 7.014, "step": 2439, "task_loss": 0.5204482078552246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06039695633279518, "compression/movement_sparsity/importance_threshold": -0.006683488687506874, "compression/movement_sparsity/linear_layer_sparsity": 0.003177254087413879, "compression/movement_sparsity/model_sparsity": 0.0030681056351343676, "compression_loss": 6.5224785804748535, "distillation_loss": 0.5650800466537476, "epoch": 2.06, "learning_rate": 4.409692871231333e-05, "loss": 7.0478, "step": 2440, "task_loss": 0.47532305121421814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06120695891203709, "compression/movement_sparsity/importance_threshold": -0.006677727059645298, "compression/movement_sparsity/linear_layer_sparsity": 0.003332363660021185, "compression/movement_sparsity/model_sparsity": 0.0032178867167497543, "compression_loss": 6.609887599945068, "distillation_loss": 0.24937833845615387, "epoch": 2.06, "learning_rate": 4.409223255377102e-05, "loss": 6.9277, "step": 2441, "task_loss": 0.193439781665802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06201649583876001, "compression/movement_sparsity/importance_threshold": -0.006671968744015802, "compression/movement_sparsity/linear_layer_sparsity": 0.003360027728936387, "compression/movement_sparsity/model_sparsity": 0.003244600439793089, "compression_loss": 6.697245121002197, "distillation_loss": 0.41388922929763794, "epoch": 2.06, "learning_rate": 4.408753639522871e-05, "loss": 7.2005, "step": 2442, "task_loss": 0.5878494381904602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06282556724684862, "compression/movement_sparsity/importance_threshold": -0.006666213739666049, "compression/movement_sparsity/linear_layer_sparsity": 0.003491491677121777, "compression/movement_sparsity/model_sparsity": 0.0033715481969279013, "compression_loss": 6.784547805786133, "distillation_loss": 0.4233899712562561, "epoch": 2.07, "learning_rate": 4.408284023668639e-05, "loss": 7.1326, "step": 2443, "task_loss": 0.36933910846710205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06363417327018872, "compression/movement_sparsity/importance_threshold": -0.006660462045643698, "compression/movement_sparsity/linear_layer_sparsity": 0.003600621659325195, "compression/movement_sparsity/model_sparsity": 0.0034769292285195385, "compression_loss": 6.871796131134033, "distillation_loss": 0.8192423582077026, "epoch": 2.07, "learning_rate": 4.4078144078144076e-05, "loss": 7.4548, "step": 2444, "task_loss": 0.9345294237136841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0644423140426652, "compression/movement_sparsity/importance_threshold": -0.006654713660996412, "compression/movement_sparsity/linear_layer_sparsity": 0.00376870472632032, "compression/movement_sparsity/model_sparsity": 0.0036392381250793856, "compression_loss": 6.958992958068848, "distillation_loss": 0.33281075954437256, "epoch": 2.07, "learning_rate": 4.407344791960177e-05, "loss": 7.3893, "step": 2445, "task_loss": 0.09758565574884415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0652499896981632, "compression/movement_sparsity/importance_threshold": -0.006648968584771853, "compression/movement_sparsity/linear_layer_sparsity": 0.003926115663281347, "compression/movement_sparsity/model_sparsity": 0.0037912415121031185, "compression_loss": 7.046136379241943, "distillation_loss": 0.43842393159866333, "epoch": 2.07, "learning_rate": 4.4068751761059455e-05, "loss": 7.6257, "step": 2446, "task_loss": 0.543931245803833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06605720037056773, "compression/movement_sparsity/importance_threshold": -0.006643226816017684, "compression/movement_sparsity/linear_layer_sparsity": 0.004034661361270609, "compression/movement_sparsity/model_sparsity": 0.0038960583314408236, "compression_loss": 7.1332316398620605, "distillation_loss": 0.6419340968132019, "epoch": 2.07, "learning_rate": 4.406405560251715e-05, "loss": 7.6129, "step": 2447, "task_loss": 0.6952275037765503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06686394619376412, "compression/movement_sparsity/importance_threshold": -0.006637488353781565, "compression/movement_sparsity/linear_layer_sparsity": 0.00418425004426251, "compression/movement_sparsity/model_sparsity": 0.004040508182983338, "compression_loss": 7.220275402069092, "distillation_loss": 0.9645683765411377, "epoch": 2.07, "learning_rate": 4.405935944397483e-05, "loss": 7.8226, "step": 2448, "task_loss": 1.0399248600006104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06767022730163741, "compression/movement_sparsity/importance_threshold": -0.00663175319711116, "compression/movement_sparsity/linear_layer_sparsity": 0.00437560908648284, "compression/movement_sparsity/model_sparsity": 0.004225293453414128, "compression_loss": 7.307265758514404, "distillation_loss": 0.6143982410430908, "epoch": 2.07, "learning_rate": 4.405466328543252e-05, "loss": 7.8643, "step": 2449, "task_loss": 1.0221641063690186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06847604382807271, "compression/movement_sparsity/importance_threshold": -0.006626021345054129, "compression/movement_sparsity/linear_layer_sparsity": 0.004525567418671453, "compression/movement_sparsity/model_sparsity": 0.004370100255566273, "compression_loss": 7.394208908081055, "distillation_loss": 0.5359805226325989, "epoch": 2.07, "learning_rate": 4.404996712689021e-05, "loss": 7.9445, "step": 2450, "task_loss": 1.0360839366912842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06928139590695526, "compression/movement_sparsity/importance_threshold": -0.006620292796658133, "compression/movement_sparsity/linear_layer_sparsity": 0.004671137657170069, "compression/movement_sparsity/model_sparsity": 0.0045106697085460275, "compression_loss": 7.481098651885986, "distillation_loss": 0.44888514280319214, "epoch": 2.07, "learning_rate": 4.4045270968347894e-05, "loss": 7.7936, "step": 2451, "task_loss": 0.24398085474967957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07008628367217007, "compression/movement_sparsity/importance_threshold": -0.0066145675509708375, "compression/movement_sparsity/linear_layer_sparsity": 0.0048361681372504125, "compression/movement_sparsity/model_sparsity": 0.004670030883942472, "compression_loss": 7.56793212890625, "distillation_loss": 0.4660545587539673, "epoch": 2.07, "learning_rate": 4.404057480980558e-05, "loss": 7.9525, "step": 2452, "task_loss": 0.7467697858810425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07089070725760216, "compression/movement_sparsity/importance_threshold": -0.006608845607039903, "compression/movement_sparsity/linear_layer_sparsity": 0.005048513714509861, "compression/movement_sparsity/model_sparsity": 0.004875081737371655, "compression_loss": 7.654711723327637, "distillation_loss": 0.42556673288345337, "epoch": 2.07, "learning_rate": 4.4035878651263267e-05, "loss": 8.1926, "step": 2453, "task_loss": 0.9737802743911743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0716946667971371, "compression/movement_sparsity/importance_threshold": -0.006603126963912989, "compression/movement_sparsity/linear_layer_sparsity": 0.005169472471008056, "compression/movement_sparsity/model_sparsity": 0.004991885188471476, "compression_loss": 7.741440296173096, "distillation_loss": 0.5071057677268982, "epoch": 2.07, "learning_rate": 4.403118249272096e-05, "loss": 8.2845, "step": 2454, "task_loss": 0.4986054301261902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07249816242465978, "compression/movement_sparsity/importance_threshold": -0.006597411620637759, "compression/movement_sparsity/linear_layer_sparsity": 0.0053041797927904, "compression/movement_sparsity/model_sparsity": 0.005121964899342404, "compression_loss": 7.8281145095825195, "distillation_loss": 0.33635836839675903, "epoch": 2.08, "learning_rate": 4.4026486334178646e-05, "loss": 8.2719, "step": 2455, "task_loss": 0.5483292937278748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07330119427405535, "compression/movement_sparsity/importance_threshold": -0.0065916995762618745, "compression/movement_sparsity/linear_layer_sparsity": 0.005460255222976211, "compression/movement_sparsity/model_sparsity": 0.005272678658357149, "compression_loss": 7.914743900299072, "distillation_loss": 0.41755783557891846, "epoch": 2.08, "learning_rate": 4.402179017563633e-05, "loss": 8.3935, "step": 2456, "task_loss": 0.8739609718322754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07410376247920902, "compression/movement_sparsity/importance_threshold": -0.006585990829832997, "compression/movement_sparsity/linear_layer_sparsity": 0.005565760258218327, "compression/movement_sparsity/model_sparsity": 0.005374559271067245, "compression_loss": 8.001320838928223, "distillation_loss": 0.6115769743919373, "epoch": 2.08, "learning_rate": 4.401709401709402e-05, "loss": 8.4242, "step": 2457, "task_loss": 0.9703568816184998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07490586717400582, "compression/movement_sparsity/importance_threshold": -0.0065802853803987905, "compression/movement_sparsity/linear_layer_sparsity": 0.005690343961677824, "compression/movement_sparsity/model_sparsity": 0.005494863141048608, "compression_loss": 8.08784294128418, "distillation_loss": 0.3034594655036926, "epoch": 2.08, "learning_rate": 4.4012397858551705e-05, "loss": 8.5538, "step": 2458, "task_loss": 0.43958207964897156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07570750849233099, "compression/movement_sparsity/importance_threshold": -0.006574583227006913, "compression/movement_sparsity/linear_layer_sparsity": 0.005872175593957099, "compression/movement_sparsity/model_sparsity": 0.00567044829737956, "compression_loss": 8.17431354522705, "distillation_loss": 0.3621300756931305, "epoch": 2.08, "learning_rate": 4.40077017000094e-05, "loss": 8.63, "step": 2459, "task_loss": 0.8015866875648499 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07650868656806953, "compression/movement_sparsity/importance_threshold": -0.006568884368705031, "compression/movement_sparsity/linear_layer_sparsity": 0.006021955063631174, "compression/movement_sparsity/model_sparsity": 0.005815082381494787, "compression_loss": 8.260735511779785, "distillation_loss": 0.5284761786460876, "epoch": 2.08, "learning_rate": 4.400300554146708e-05, "loss": 8.7985, "step": 2460, "task_loss": 0.8613170385360718 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07730940153510668, "compression/movement_sparsity/importance_threshold": -0.006563188804540803, "compression/movement_sparsity/linear_layer_sparsity": 0.006097697376454176, "compression/movement_sparsity/model_sparsity": 0.00588822271286171, "compression_loss": 8.3471040725708, "distillation_loss": 0.5273487567901611, "epoch": 2.08, "learning_rate": 4.399830938292477e-05, "loss": 8.8541, "step": 2461, "task_loss": 0.3633285164833069 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07810965352732757, "compression/movement_sparsity/importance_threshold": -0.006557496533561892, "compression/movement_sparsity/linear_layer_sparsity": 0.006154742594424145, "compression/movement_sparsity/model_sparsity": 0.005943308252102793, "compression_loss": 8.433415412902832, "distillation_loss": 0.3879525661468506, "epoch": 2.08, "learning_rate": 4.399361322438246e-05, "loss": 9.0803, "step": 2462, "task_loss": 0.12355247884988785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0789094426786171, "compression/movement_sparsity/importance_threshold": -0.006551807554815962, "compression/movement_sparsity/linear_layer_sparsity": 0.00638349582635054, "compression/movement_sparsity/model_sparsity": 0.006164203106785264, "compression_loss": 8.519676208496094, "distillation_loss": 0.4563162624835968, "epoch": 2.08, "learning_rate": 4.3988917065840143e-05, "loss": 9.1859, "step": 2463, "task_loss": 0.48732689023017883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07970876912286096, "compression/movement_sparsity/importance_threshold": -0.00654612186735067, "compression/movement_sparsity/linear_layer_sparsity": 0.006624650192618234, "compression/movement_sparsity/model_sparsity": 0.006397073078694057, "compression_loss": 8.605894088745117, "distillation_loss": 0.6254844665527344, "epoch": 2.08, "learning_rate": 4.398422090729784e-05, "loss": 8.9954, "step": 2464, "task_loss": 0.1755497306585312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08050763299394392, "compression/movement_sparsity/importance_threshold": -0.006540439470213681, "compression/movement_sparsity/linear_layer_sparsity": 0.00681449486554881, "compression/movement_sparsity/model_sparsity": 0.00658039600307894, "compression_loss": 8.692054748535156, "distillation_loss": 0.425329327583313, "epoch": 2.08, "learning_rate": 4.3979524748755516e-05, "loss": 9.2972, "step": 2465, "task_loss": 0.06090657040476799 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08130603442575124, "compression/movement_sparsity/importance_threshold": -0.006534760362452655, "compression/movement_sparsity/linear_layer_sparsity": 0.007026840442808258, "compression/movement_sparsity/model_sparsity": 0.006785446856508124, "compression_loss": 8.778165817260742, "distillation_loss": 0.6477507948875427, "epoch": 2.08, "learning_rate": 4.397482859021321e-05, "loss": 9.2139, "step": 2466, "task_loss": 0.5576995015144348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08210397355216792, "compression/movement_sparsity/importance_threshold": -0.006529084543115257, "compression/movement_sparsity/linear_layer_sparsity": 0.007109641862871691, "compression/movement_sparsity/model_sparsity": 0.0068654037930654145, "compression_loss": 8.864222526550293, "distillation_loss": 0.307961106300354, "epoch": 2.09, "learning_rate": 4.3970132431670896e-05, "loss": 9.4084, "step": 2467, "task_loss": 0.6517997980117798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0829014505070792, "compression/movement_sparsity/importance_threshold": -0.006523412011249146, "compression/movement_sparsity/linear_layer_sparsity": 0.007276198636409425, "compression/movement_sparsity/model_sparsity": 0.00702623882904356, "compression_loss": 8.950233459472656, "distillation_loss": 0.4365618824958801, "epoch": 2.09, "learning_rate": 4.396543627312858e-05, "loss": 9.4469, "step": 2468, "task_loss": 1.3606501817703247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0836984654243702, "compression/movement_sparsity/importance_threshold": -0.006517742765901984, "compression/movement_sparsity/linear_layer_sparsity": 0.007487399493575831, "compression/movement_sparsity/model_sparsity": 0.007230184287036467, "compression_loss": 9.036184310913086, "distillation_loss": 0.4338088929653168, "epoch": 2.09, "learning_rate": 4.396074011458627e-05, "loss": 9.4166, "step": 2469, "task_loss": 0.08528230339288712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08449501843792617, "compression/movement_sparsity/importance_threshold": -0.006512076806121435, "compression/movement_sparsity/linear_layer_sparsity": 0.0076360223189892275, "compression/movement_sparsity/model_sparsity": 0.007373701461179623, "compression_loss": 9.12208080291748, "distillation_loss": 0.6494704484939575, "epoch": 2.09, "learning_rate": 4.3956043956043955e-05, "loss": 9.6886, "step": 2470, "task_loss": 0.8184638023376465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.085291109681632, "compression/movement_sparsity/importance_threshold": -0.006506414130955159, "compression/movement_sparsity/linear_layer_sparsity": 0.007781210984123496, "compression/movement_sparsity/model_sparsity": 0.007513902449013952, "compression_loss": 9.207924842834473, "distillation_loss": 0.27752000093460083, "epoch": 2.09, "learning_rate": 4.395134779750165e-05, "loss": 9.6767, "step": 2471, "task_loss": 1.1015565395355225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08608673928937305, "compression/movement_sparsity/importance_threshold": -0.006500754739450818, "compression/movement_sparsity/linear_layer_sparsity": 0.00800884334429212, "compression/movement_sparsity/model_sparsity": 0.007733714937331736, "compression_loss": 9.293724060058594, "distillation_loss": 0.7602990865707397, "epoch": 2.09, "learning_rate": 4.3946651638959334e-05, "loss": 9.9297, "step": 2472, "task_loss": 0.2723066210746765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08688190739503443, "compression/movement_sparsity/importance_threshold": -0.006495098630656075, "compression/movement_sparsity/linear_layer_sparsity": 0.008150037413268375, "compression/movement_sparsity/model_sparsity": 0.007870058555674894, "compression_loss": 9.379473686218262, "distillation_loss": 0.25742965936660767, "epoch": 2.09, "learning_rate": 4.394195548041702e-05, "loss": 9.8176, "step": 2473, "task_loss": 0.9002591371536255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08767661413250083, "compression/movement_sparsity/importance_threshold": -0.006489445803618593, "compression/movement_sparsity/linear_layer_sparsity": 0.008400540326962585, "compression/movement_sparsity/model_sparsity": 0.008111955923646606, "compression_loss": 9.465164184570312, "distillation_loss": 0.5517396330833435, "epoch": 2.09, "learning_rate": 4.393725932187471e-05, "loss": 9.9922, "step": 2474, "task_loss": 0.8314756155014038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08847085963565826, "compression/movement_sparsity/importance_threshold": -0.006483796257386028, "compression/movement_sparsity/linear_layer_sparsity": 0.008637700097072263, "compression/movement_sparsity/model_sparsity": 0.008340968526064228, "compression_loss": 9.55080509185791, "distillation_loss": 0.4952247142791748, "epoch": 2.09, "learning_rate": 4.393256316333239e-05, "loss": 10.1616, "step": 2475, "task_loss": 0.6142159700393677 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0892646440383913, "compression/movement_sparsity/importance_threshold": -0.006478149991006048, "compression/movement_sparsity/linear_layer_sparsity": 0.008836499819897372, "compression/movement_sparsity/model_sparsity": 0.008532938866830812, "compression_loss": 9.636401176452637, "distillation_loss": 0.5077518224716187, "epoch": 2.09, "learning_rate": 4.3927867004790086e-05, "loss": 10.1363, "step": 2476, "task_loss": 0.2877042591571808 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09005796747458517, "compression/movement_sparsity/importance_threshold": -0.006472507003526312, "compression/movement_sparsity/linear_layer_sparsity": 0.009079383190472266, "compression/movement_sparsity/model_sparsity": 0.008767478446429814, "compression_loss": 9.721948623657227, "distillation_loss": 0.7514311075210571, "epoch": 2.09, "learning_rate": 4.3923170846247766e-05, "loss": 10.2435, "step": 2477, "task_loss": 1.2035338878631592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.090850830078125, "compression/movement_sparsity/importance_threshold": -0.006466867293994483, "compression/movement_sparsity/linear_layer_sparsity": 0.00917249901554072, "compression/movement_sparsity/model_sparsity": 0.008857395456449382, "compression_loss": 9.807450294494629, "distillation_loss": 0.3878116309642792, "epoch": 2.09, "learning_rate": 4.391847468770546e-05, "loss": 10.3313, "step": 2478, "task_loss": 0.8201970458030701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09164323198289603, "compression/movement_sparsity/importance_threshold": -0.006461230861458221, "compression/movement_sparsity/linear_layer_sparsity": 0.009430418761504437, "compression/movement_sparsity/model_sparsity": 0.009106454865685301, "compression_loss": 9.892892837524414, "distillation_loss": 0.5213336944580078, "epoch": 2.1, "learning_rate": 4.3913778529163145e-05, "loss": 10.4896, "step": 2479, "task_loss": 0.5990313291549683 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09243517332278328, "compression/movement_sparsity/importance_threshold": -0.006455597704965191, "compression/movement_sparsity/linear_layer_sparsity": 0.009650586592733011, "compression/movement_sparsity/model_sparsity": 0.009319059254595701, "compression_loss": 9.978288650512695, "distillation_loss": 0.491018682718277, "epoch": 2.1, "learning_rate": 4.390908237062084e-05, "loss": 10.4593, "step": 2480, "task_loss": 1.2859872579574585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09322665423167198, "compression/movement_sparsity/importance_threshold": -0.006449967823563051, "compression/movement_sparsity/linear_layer_sparsity": 0.009845976003614263, "compression/movement_sparsity/model_sparsity": 0.009507736438125048, "compression_loss": 10.063633918762207, "distillation_loss": 0.46669119596481323, "epoch": 2.1, "learning_rate": 4.3904386212078525e-05, "loss": 10.5844, "step": 2481, "task_loss": 0.4867374300956726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09401767484344714, "compression/movement_sparsity/importance_threshold": -0.006444341216299467, "compression/movement_sparsity/linear_layer_sparsity": 0.010067300479103517, "compression/movement_sparsity/model_sparsity": 0.009721457737007519, "compression_loss": 10.14892578125, "distillation_loss": 0.4115389883518219, "epoch": 2.1, "learning_rate": 4.389969005353621e-05, "loss": 10.7386, "step": 2482, "task_loss": 0.9437954425811768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09480823529199378, "compression/movement_sparsity/importance_threshold": -0.006438717882222099, "compression/movement_sparsity/linear_layer_sparsity": 0.010377304989300683, "compression/movement_sparsity/model_sparsity": 0.01002081263859399, "compression_loss": 10.234180450439453, "distillation_loss": 0.7332837581634521, "epoch": 2.1, "learning_rate": 4.38949938949939e-05, "loss": 10.7518, "step": 2483, "task_loss": 0.3710191547870636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09559833571119758, "compression/movement_sparsity/importance_threshold": -0.0064330978203786075, "compression/movement_sparsity/linear_layer_sparsity": 0.01050589521308583, "compression/movement_sparsity/model_sparsity": 0.01014498539260232, "compression_loss": 10.319371223449707, "distillation_loss": 0.5509597659111023, "epoch": 2.1, "learning_rate": 4.3890297736451584e-05, "loss": 10.8432, "step": 2484, "task_loss": 0.5044685006141663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09638797623494333, "compression/movement_sparsity/importance_threshold": -0.006427481029816655, "compression/movement_sparsity/linear_layer_sparsity": 0.010678378297938588, "compression/movement_sparsity/model_sparsity": 0.010311543152870351, "compression_loss": 10.404526710510254, "distillation_loss": 0.5014839172363281, "epoch": 2.1, "learning_rate": 4.388560157790928e-05, "loss": 10.9625, "step": 2485, "task_loss": 0.35728397965431213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09717715699711615, "compression/movement_sparsity/importance_threshold": -0.006421867509583904, "compression/movement_sparsity/linear_layer_sparsity": 0.010898355342484988, "compression/movement_sparsity/model_sparsity": 0.01052396330920804, "compression_loss": 10.489617347717285, "distillation_loss": 0.47611770033836365, "epoch": 2.1, "learning_rate": 4.3880905419366956e-05, "loss": 10.9974, "step": 2486, "task_loss": 0.9920874834060669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09796587813160129, "compression/movement_sparsity/importance_threshold": -0.006416257258728016, "compression/movement_sparsity/linear_layer_sparsity": 0.011016070725386228, "compression/movement_sparsity/model_sparsity": 0.010637634806571747, "compression_loss": 10.574664115905762, "distillation_loss": 0.6422837376594543, "epoch": 2.1, "learning_rate": 4.387620926082465e-05, "loss": 11.1041, "step": 2487, "task_loss": 0.9064741730690002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09875413977228376, "compression/movement_sparsity/importance_threshold": -0.0064106502762966535, "compression/movement_sparsity/linear_layer_sparsity": 0.011257046229139384, "compression/movement_sparsity/model_sparsity": 0.01087033206044362, "compression_loss": 10.659652709960938, "distillation_loss": 1.2515149116516113, "epoch": 2.1, "learning_rate": 4.3871513102282336e-05, "loss": 11.4236, "step": 2488, "task_loss": 2.163140296936035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09954194205304878, "compression/movement_sparsity/importance_threshold": -0.0064050465613374775, "compression/movement_sparsity/linear_layer_sparsity": 0.011436004137018417, "compression/movement_sparsity/model_sparsity": 0.01104314221364809, "compression_loss": 10.74459171295166, "distillation_loss": 0.7164523005485535, "epoch": 2.1, "learning_rate": 4.386681694374002e-05, "loss": 11.3929, "step": 2489, "task_loss": 2.0320963859558105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1003292851077815, "compression/movement_sparsity/importance_threshold": -0.0063994461128981505, "compression/movement_sparsity/linear_layer_sparsity": 0.011608844946900251, "compression/movement_sparsity/model_sparsity": 0.011210045409989959, "compression_loss": 10.829482078552246, "distillation_loss": 0.3273899555206299, "epoch": 2.1, "learning_rate": 4.386212078519771e-05, "loss": 11.2423, "step": 2490, "task_loss": 0.31333279609680176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10111616907036702, "compression/movement_sparsity/importance_threshold": -0.0063938489300263334, "compression/movement_sparsity/linear_layer_sparsity": 0.01187632787530793, "compression/movement_sparsity/model_sparsity": 0.011468339476933098, "compression_loss": 10.914313316345215, "distillation_loss": 0.4849093556404114, "epoch": 2.11, "learning_rate": 4.3857424626655395e-05, "loss": 11.4719, "step": 2491, "task_loss": 0.9567331075668335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10190259407469049, "compression/movement_sparsity/importance_threshold": -0.006388255011769689, "compression/movement_sparsity/linear_layer_sparsity": 0.01212207304611543, "compression/movement_sparsity/model_sparsity": 0.01170564254512279, "compression_loss": 10.9990873336792, "distillation_loss": 0.516119658946991, "epoch": 2.11, "learning_rate": 4.385272846811309e-05, "loss": 11.6178, "step": 2492, "task_loss": 1.4960143566131592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10268856025463713, "compression/movement_sparsity/importance_threshold": -0.006382664357175879, "compression/movement_sparsity/linear_layer_sparsity": 0.012438397365159604, "compression/movement_sparsity/model_sparsity": 0.012011100150680366, "compression_loss": 11.083808898925781, "distillation_loss": 0.5751230716705322, "epoch": 2.11, "learning_rate": 4.3848032309570774e-05, "loss": 11.5869, "step": 2493, "task_loss": 0.47563672065734863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10347406774409162, "compression/movement_sparsity/importance_threshold": -0.006377076965292567, "compression/movement_sparsity/linear_layer_sparsity": 0.012669440037272087, "compression/movement_sparsity/model_sparsity": 0.01223420579623539, "compression_loss": 11.168489456176758, "distillation_loss": 0.5364096164703369, "epoch": 2.11, "learning_rate": 4.384333615102846e-05, "loss": 11.7121, "step": 2494, "task_loss": 0.9344714283943176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10425911667693977, "compression/movement_sparsity/importance_threshold": -0.006371492835167411, "compression/movement_sparsity/linear_layer_sparsity": 0.012939582055062562, "compression/movement_sparsity/model_sparsity": 0.01249506760466071, "compression_loss": 11.253110885620117, "distillation_loss": 0.36371853947639465, "epoch": 2.11, "learning_rate": 4.383863999248615e-05, "loss": 11.8549, "step": 2495, "task_loss": 0.552375078201294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10504370718706657, "compression/movement_sparsity/importance_threshold": -0.006365911965848075, "compression/movement_sparsity/linear_layer_sparsity": 0.013270787735316293, "compression/movement_sparsity/model_sparsity": 0.012814895350889876, "compression_loss": 11.33768081665039, "distillation_loss": 0.5192692279815674, "epoch": 2.11, "learning_rate": 4.383394383394383e-05, "loss": 11.8876, "step": 2496, "task_loss": 0.25774580240249634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10582783940835694, "compression/movement_sparsity/importance_threshold": -0.00636033435638222, "compression/movement_sparsity/linear_layer_sparsity": 0.013531986627379873, "compression/movement_sparsity/model_sparsity": 0.013067121257469292, "compression_loss": 11.422198295593262, "distillation_loss": 0.6309471130371094, "epoch": 2.11, "learning_rate": 4.3829247675401526e-05, "loss": 11.9317, "step": 2497, "task_loss": 1.3160457611083984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1066115134746961, "compression/movement_sparsity/importance_threshold": -0.00635476000581751, "compression/movement_sparsity/linear_layer_sparsity": 0.013849646453199263, "compression/movement_sparsity/model_sparsity": 0.013373868491035858, "compression_loss": 11.506658554077148, "distillation_loss": 0.42837756872177124, "epoch": 2.11, "learning_rate": 4.382455151685921e-05, "loss": 11.9683, "step": 2498, "task_loss": 0.6965353488922119 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1073947295199692, "compression/movement_sparsity/importance_threshold": -0.006349188913201604, "compression/movement_sparsity/linear_layer_sparsity": 0.014062182817140886, "compression/movement_sparsity/model_sparsity": 0.013579103577037755, "compression_loss": 11.591073989868164, "distillation_loss": 0.4134630262851715, "epoch": 2.11, "learning_rate": 4.38198553583169e-05, "loss": 11.9762, "step": 2499, "task_loss": 1.0188318490982056 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10817748767806146, "compression/movement_sparsity/importance_threshold": -0.006343621077582165, "compression/movement_sparsity/linear_layer_sparsity": 0.014364388921704198, "compression/movement_sparsity/model_sparsity": 0.013870927972214597, "compression_loss": 11.675420761108398, "distillation_loss": 0.525282621383667, "epoch": 2.11, "learning_rate": 4.3815159199774585e-05, "loss": 12.2559, "step": 2500, "task_loss": 0.44454771280288696 }, { "epoch": 2.11, "eval_accuracy": 0.9032871287128713, "eval_loss": 12.012799263000488, "eval_runtime": 227.8983, "eval_samples_per_second": 110.795, "eval_steps_per_second": 0.869, "step": 2500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10895978808285789, "compression/movement_sparsity/importance_threshold": -0.006338056498006856, "compression/movement_sparsity/linear_layer_sparsity": 0.014659726705709253, "compression/movement_sparsity/model_sparsity": 0.014156119994773782, "compression_loss": 11.75971508026123, "distillation_loss": 0.28192323446273804, "epoch": 2.11, "learning_rate": 4.381046304123227e-05, "loss": 12.2157, "step": 2501, "task_loss": 0.22753363847732544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10974163086824362, "compression/movement_sparsity/importance_threshold": -0.0063324951735233385, "compression/movement_sparsity/linear_layer_sparsity": 0.01492551640231264, "compression/movement_sparsity/model_sparsity": 0.014412778997634096, "compression_loss": 11.843955039978027, "distillation_loss": 0.5874152183532715, "epoch": 2.11, "learning_rate": 4.3805766882689965e-05, "loss": 12.4217, "step": 2502, "task_loss": 0.466312050819397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.110523016168104, "compression/movement_sparsity/importance_threshold": -0.006326937103179272, "compression/movement_sparsity/linear_layer_sparsity": 0.015297920081748278, "compression/movement_sparsity/model_sparsity": 0.014772389465033401, "compression_loss": 11.928153038024902, "distillation_loss": 0.399452805519104, "epoch": 2.12, "learning_rate": 4.3801070724147645e-05, "loss": 12.3957, "step": 2503, "task_loss": 0.6898105144500732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1113039441163236, "compression/movement_sparsity/importance_threshold": -0.006321382286022323, "compression/movement_sparsity/linear_layer_sparsity": 0.01570734830169327, "compression/movement_sparsity/model_sparsity": 0.015167752566074753, "compression_loss": 12.012296676635742, "distillation_loss": 0.6872439384460449, "epoch": 2.12, "learning_rate": 4.379637456560534e-05, "loss": 12.6114, "step": 2504, "task_loss": 1.0822148323059082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11208441484678833, "compression/movement_sparsity/importance_threshold": -0.006315830721100149, "compression/movement_sparsity/linear_layer_sparsity": 0.015916832078720113, "compression/movement_sparsity/model_sparsity": 0.015370039930913246, "compression_loss": 12.096396446228027, "distillation_loss": 0.5254052877426147, "epoch": 2.12, "learning_rate": 4.3791678407063024e-05, "loss": 12.7054, "step": 2505, "task_loss": 0.5405011177062988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11286442849338296, "compression/movement_sparsity/importance_threshold": -0.006310282407460413, "compression/movement_sparsity/linear_layer_sparsity": 0.016242123371826456, "compression/movement_sparsity/model_sparsity": 0.01568415646738832, "compression_loss": 12.180438995361328, "distillation_loss": 0.6915881037712097, "epoch": 2.12, "learning_rate": 4.378698224852072e-05, "loss": 12.6872, "step": 2506, "task_loss": 0.8806161284446716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11364398518999275, "compression/movement_sparsity/importance_threshold": -0.006304737344150777, "compression/movement_sparsity/linear_layer_sparsity": 0.016588770849168627, "compression/movement_sparsity/model_sparsity": 0.016018895537471413, "compression_loss": 12.264431953430176, "distillation_loss": 0.47974878549575806, "epoch": 2.12, "learning_rate": 4.37822860899784e-05, "loss": 12.8268, "step": 2507, "task_loss": 0.3200993537902832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1144230850705027, "compression/movement_sparsity/importance_threshold": -0.006299195530218902, "compression/movement_sparsity/linear_layer_sparsity": 0.016914252928957145, "compression/movement_sparsity/model_sparsity": 0.0163331963065192, "compression_loss": 12.34838581085205, "distillation_loss": 0.6903285980224609, "epoch": 2.12, "learning_rate": 4.377758993143608e-05, "loss": 12.917, "step": 2508, "task_loss": 0.7361826300621033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11520172826879793, "compression/movement_sparsity/importance_threshold": -0.006293656964712453, "compression/movement_sparsity/linear_layer_sparsity": 0.017185933164372648, "compression/movement_sparsity/model_sparsity": 0.016595543490062016, "compression_loss": 12.432276725769043, "distillation_loss": 0.6904730200767517, "epoch": 2.12, "learning_rate": 4.3772893772893776e-05, "loss": 13.0039, "step": 2509, "task_loss": 0.7834062576293945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1159799149187637, "compression/movement_sparsity/importance_threshold": -0.0062881216466790886, "compression/movement_sparsity/linear_layer_sparsity": 0.017500743114127068, "compression/movement_sparsity/model_sparsity": 0.01689953874957369, "compression_loss": 12.516119003295898, "distillation_loss": 0.4822756052017212, "epoch": 2.12, "learning_rate": 4.376819761435146e-05, "loss": 12.9688, "step": 2510, "task_loss": 0.36765778064727783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1167576451542851, "compression/movement_sparsity/importance_threshold": -0.006282589575166472, "compression/movement_sparsity/linear_layer_sparsity": 0.01785350768946644, "compression/movement_sparsity/model_sparsity": 0.017240184776519382, "compression_loss": 12.599906921386719, "distillation_loss": 0.7292959094047546, "epoch": 2.12, "learning_rate": 4.3763501455809156e-05, "loss": 13.2828, "step": 2511, "task_loss": 1.1089866161346436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1175349191092473, "compression/movement_sparsity/importance_threshold": -0.006277060749222264, "compression/movement_sparsity/linear_layer_sparsity": 0.01809925286027394, "compression/movement_sparsity/model_sparsity": 0.017477487844709073, "compression_loss": 12.683643341064453, "distillation_loss": 0.6120069026947021, "epoch": 2.12, "learning_rate": 4.3758805297266835e-05, "loss": 13.3399, "step": 2512, "task_loss": 0.43475252389907837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11831173691753538, "compression/movement_sparsity/importance_threshold": -0.006271535167894128, "compression/movement_sparsity/linear_layer_sparsity": 0.018456596315985483, "compression/movement_sparsity/model_sparsity": 0.017822555453399874, "compression_loss": 12.767343521118164, "distillation_loss": 0.2155183106660843, "epoch": 2.12, "learning_rate": 4.375410913872453e-05, "loss": 13.2673, "step": 2513, "task_loss": 0.05217612907290459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11908809871303416, "compression/movement_sparsity/importance_threshold": -0.0062660128302297275, "compression/movement_sparsity/linear_layer_sparsity": 0.018644318487076872, "compression/movement_sparsity/model_sparsity": 0.018003828790413328, "compression_loss": 12.850996017456055, "distillation_loss": 0.44571420550346375, "epoch": 2.13, "learning_rate": 4.3749412980182215e-05, "loss": 13.3018, "step": 2514, "task_loss": 1.0539829730987549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11986400462962954, "compression/movement_sparsity/importance_threshold": -0.0062604937352767195, "compression/movement_sparsity/linear_layer_sparsity": 0.018943519701395947, "compression/movement_sparsity/model_sparsity": 0.018292751522569945, "compression_loss": 12.934588432312012, "distillation_loss": 0.6470510363578796, "epoch": 2.13, "learning_rate": 4.37447168216399e-05, "loss": 13.5502, "step": 2515, "task_loss": 0.6987337470054626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12063945480120619, "compression/movement_sparsity/importance_threshold": -0.006254977882082768, "compression/movement_sparsity/linear_layer_sparsity": 0.01928558829836595, "compression/movement_sparsity/model_sparsity": 0.01862306901090794, "compression_loss": 13.018125534057617, "distillation_loss": 1.1003007888793945, "epoch": 2.13, "learning_rate": 4.374002066309759e-05, "loss": 13.5767, "step": 2516, "task_loss": 0.8108305931091309 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12141444936164936, "compression/movement_sparsity/importance_threshold": -0.006249465269695536, "compression/movement_sparsity/linear_layer_sparsity": 0.01972877383688807, "compression/movement_sparsity/model_sparsity": 0.019051029762783635, "compression_loss": 13.101611137390137, "distillation_loss": 0.7956573367118835, "epoch": 2.13, "learning_rate": 4.3735324504555274e-05, "loss": 13.6503, "step": 2517, "task_loss": 1.5217866897583008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12218898844484405, "compression/movement_sparsity/importance_threshold": -0.006243955897162684, "compression/movement_sparsity/linear_layer_sparsity": 0.020049498173789877, "compression/movement_sparsity/model_sparsity": 0.0193607362320494, "compression_loss": 13.18504810333252, "distillation_loss": 0.3575247824192047, "epoch": 2.13, "learning_rate": 4.373062834601297e-05, "loss": 13.5596, "step": 2518, "task_loss": 0.2199210226535797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12296307218467561, "compression/movement_sparsity/importance_threshold": -0.006238449763531875, "compression/movement_sparsity/linear_layer_sparsity": 0.020455683020137914, "compression/movement_sparsity/model_sparsity": 0.019752967379354638, "compression_loss": 13.268409729003906, "distillation_loss": 0.5014580488204956, "epoch": 2.13, "learning_rate": 4.372593218747065e-05, "loss": 13.7709, "step": 2519, "task_loss": 0.5799087882041931 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12373670071502896, "compression/movement_sparsity/importance_threshold": -0.00623294686785077, "compression/movement_sparsity/linear_layer_sparsity": 0.020753322168496693, "compression/movement_sparsity/model_sparsity": 0.02004038170732217, "compression_loss": 13.35171890258789, "distillation_loss": 0.40361714363098145, "epoch": 2.13, "learning_rate": 4.372123602892834e-05, "loss": 13.8495, "step": 2520, "task_loss": 0.2779461443424225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12450987416978943, "compression/movement_sparsity/importance_threshold": -0.006227447209167031, "compression/movement_sparsity/linear_layer_sparsity": 0.021223623264222766, "compression/movement_sparsity/model_sparsity": 0.020494526513594655, "compression_loss": 13.434976577758789, "distillation_loss": 0.4493398070335388, "epoch": 2.13, "learning_rate": 4.3716539870386026e-05, "loss": 14.1054, "step": 2521, "task_loss": 0.8593337535858154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12528259268284203, "compression/movement_sparsity/importance_threshold": -0.0062219507865283194, "compression/movement_sparsity/linear_layer_sparsity": 0.02158556944864175, "compression/movement_sparsity/model_sparsity": 0.020844038733102144, "compression_loss": 13.518192291259766, "distillation_loss": 0.5503358840942383, "epoch": 2.13, "learning_rate": 4.371184371184371e-05, "loss": 14.0871, "step": 2522, "task_loss": 1.1331590414047241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12605485638807157, "compression/movement_sparsity/importance_threshold": -0.006216457598982301, "compression/movement_sparsity/linear_layer_sparsity": 0.021967345523839176, "compression/movement_sparsity/model_sparsity": 0.021212699625635958, "compression_loss": 13.601358413696289, "distillation_loss": 0.6110410690307617, "epoch": 2.13, "learning_rate": 4.3707147553301405e-05, "loss": 14.1774, "step": 2523, "task_loss": 0.6087321639060974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12682666541936405, "compression/movement_sparsity/importance_threshold": -0.006210967645576631, "compression/movement_sparsity/linear_layer_sparsity": 0.022320122023346184, "compression/movement_sparsity/model_sparsity": 0.021553357167117447, "compression_loss": 13.68448257446289, "distillation_loss": 0.41959068179130554, "epoch": 2.13, "learning_rate": 4.370245139475909e-05, "loss": 14.2974, "step": 2524, "task_loss": 0.8679050207138062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12759801991060393, "compression/movement_sparsity/importance_threshold": -0.006205480925358975, "compression/movement_sparsity/linear_layer_sparsity": 0.02281617932116572, "compression/movement_sparsity/model_sparsity": 0.02203237337070614, "compression_loss": 13.767553329467773, "distillation_loss": 1.392124891281128, "epoch": 2.13, "learning_rate": 4.369775523621678e-05, "loss": 14.5156, "step": 2525, "task_loss": 1.527979850769043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12836891999567668, "compression/movement_sparsity/importance_threshold": -0.0061999974373769945, "compression/movement_sparsity/linear_layer_sparsity": 0.02316116933920651, "compression/movement_sparsity/model_sparsity": 0.02236551192031379, "compression_loss": 13.850586891174316, "distillation_loss": 0.6203687191009521, "epoch": 2.14, "learning_rate": 4.3693059077674464e-05, "loss": 14.4013, "step": 2526, "task_loss": 0.8628767728805542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1291393658084673, "compression/movement_sparsity/importance_threshold": -0.006194517180678351, "compression/movement_sparsity/linear_layer_sparsity": 0.023518512794918053, "compression/movement_sparsity/model_sparsity": 0.02271057952900459, "compression_loss": 13.933568954467773, "distillation_loss": 0.6539409160614014, "epoch": 2.14, "learning_rate": 4.368836291913215e-05, "loss": 14.5916, "step": 2527, "task_loss": 0.9376600980758667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12990935748286092, "compression/movement_sparsity/importance_threshold": -0.006189040154310706, "compression/movement_sparsity/linear_layer_sparsity": 0.023927738304013235, "compression/movement_sparsity/model_sparsity": 0.02310574688293744, "compression_loss": 14.016500473022461, "distillation_loss": 0.8362791538238525, "epoch": 2.14, "learning_rate": 4.3683666760589844e-05, "loss": 14.5751, "step": 2528, "task_loss": 0.7086493372917175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13067889515274267, "compression/movement_sparsity/importance_threshold": -0.006183566357321723, "compression/movement_sparsity/linear_layer_sparsity": 0.024290996146872166, "compression/movement_sparsity/model_sparsity": 0.02345652570138233, "compression_loss": 14.099385261535645, "distillation_loss": 0.32887721061706543, "epoch": 2.14, "learning_rate": 4.367897060204752e-05, "loss": 14.6774, "step": 2529, "task_loss": 0.4680793583393097 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13144797895199778, "compression/movement_sparsity/importance_threshold": -0.006178095788759062, "compression/movement_sparsity/linear_layer_sparsity": 0.024623942755600734, "compression/movement_sparsity/model_sparsity": 0.023778034569837497, "compression_loss": 14.182215690612793, "distillation_loss": 0.9619067907333374, "epoch": 2.14, "learning_rate": 4.3674274443505216e-05, "loss": 14.8854, "step": 2530, "task_loss": 1.2735310792922974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13221660901451127, "compression/movement_sparsity/importance_threshold": -0.006172628447670386, "compression/movement_sparsity/linear_layer_sparsity": 0.02489713736030599, "compression/movement_sparsity/model_sparsity": 0.02404184409942622, "compression_loss": 14.265002250671387, "distillation_loss": 0.6026496291160583, "epoch": 2.14, "learning_rate": 4.36695782849629e-05, "loss": 14.8062, "step": 2531, "task_loss": 1.4905155897140503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13298478547416837, "compression/movement_sparsity/importance_threshold": -0.006167164333103357, "compression/movement_sparsity/linear_layer_sparsity": 0.025286532978622735, "compression/movement_sparsity/model_sparsity": 0.024417862780332745, "compression_loss": 14.347742080688477, "distillation_loss": 1.1518590450286865, "epoch": 2.14, "learning_rate": 4.366488212642059e-05, "loss": 15.0308, "step": 2532, "task_loss": 1.157196283340454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1337525084648542, "compression/movement_sparsity/importance_threshold": -0.006161703444105635, "compression/movement_sparsity/linear_layer_sparsity": 0.025723255618286218, "compression/movement_sparsity/model_sparsity": 0.024839582653807802, "compression_loss": 14.430432319641113, "distillation_loss": 0.9193868041038513, "epoch": 2.14, "learning_rate": 4.3660185967878275e-05, "loss": 15.0445, "step": 2533, "task_loss": 2.2844929695129395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13451977812045357, "compression/movement_sparsity/importance_threshold": -0.0061562457797248865, "compression/movement_sparsity/linear_layer_sparsity": 0.026013251375190404, "compression/movement_sparsity/model_sparsity": 0.025119616164331032, "compression_loss": 14.51305103302002, "distillation_loss": 0.5449351072311401, "epoch": 2.14, "learning_rate": 4.365548980933596e-05, "loss": 15.0743, "step": 2534, "task_loss": 1.1850903034210205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13528659457485237, "compression/movement_sparsity/importance_threshold": -0.0061507913390087664, "compression/movement_sparsity/linear_layer_sparsity": 0.02644996209068625, "compression/movement_sparsity/model_sparsity": 0.025541324523270296, "compression_loss": 14.595621109008789, "distillation_loss": 0.5265027284622192, "epoch": 2.14, "learning_rate": 4.3650793650793655e-05, "loss": 15.2332, "step": 2535, "task_loss": 0.14602598547935486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13605295796193517, "compression/movement_sparsity/importance_threshold": -0.006145340121004942, "compression/movement_sparsity/linear_layer_sparsity": 0.02686816649801124, "compression/movement_sparsity/model_sparsity": 0.02594516232265643, "compression_loss": 14.67813491821289, "distillation_loss": 0.6029934883117676, "epoch": 2.14, "learning_rate": 4.364609749225134e-05, "loss": 15.1323, "step": 2536, "task_loss": 0.9691457152366638 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13681886841558732, "compression/movement_sparsity/importance_threshold": -0.006139892124761072, "compression/movement_sparsity/linear_layer_sparsity": 0.02726652909039015, "compression/movement_sparsity/model_sparsity": 0.02632983993448045, "compression_loss": 14.760595321655273, "distillation_loss": 0.35855188965797424, "epoch": 2.14, "learning_rate": 4.364140133370903e-05, "loss": 15.317, "step": 2537, "task_loss": 0.6140445470809937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13758432606969384, "compression/movement_sparsity/importance_threshold": -0.00613444734932482, "compression/movement_sparsity/linear_layer_sparsity": 0.0276374064763684, "compression/movement_sparsity/model_sparsity": 0.02668797654129805, "compression_loss": 14.842996597290039, "distillation_loss": 0.38889995217323303, "epoch": 2.15, "learning_rate": 4.3636705175166714e-05, "loss": 15.4014, "step": 2538, "task_loss": 0.08655299991369247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13834933105813996, "compression/movement_sparsity/importance_threshold": -0.006129005793743848, "compression/movement_sparsity/linear_layer_sparsity": 0.028032716481832528, "compression/movement_sparsity/model_sparsity": 0.02706970643195867, "compression_loss": 14.925350189208984, "distillation_loss": 0.42328158020973206, "epoch": 2.15, "learning_rate": 4.36320090166244e-05, "loss": 15.5366, "step": 2539, "task_loss": 0.43854227662086487 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1391138835148108, "compression/movement_sparsity/importance_threshold": -0.006123567457065817, "compression/movement_sparsity/linear_layer_sparsity": 0.028365460379711285, "compression/movement_sparsity/model_sparsity": 0.027391019553305328, "compression_loss": 15.007659912109375, "distillation_loss": 0.4292697310447693, "epoch": 2.15, "learning_rate": 4.362731285808209e-05, "loss": 15.4064, "step": 2540, "task_loss": 0.1479479819536209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1398779835735915, "compression/movement_sparsity/importance_threshold": -0.006118132338338389, "compression/movement_sparsity/linear_layer_sparsity": 0.02881763674063085, "compression/movement_sparsity/model_sparsity": 0.02782766226517011, "compression_loss": 15.089914321899414, "distillation_loss": 0.9188175797462463, "epoch": 2.15, "learning_rate": 4.362261669953978e-05, "loss": 15.6024, "step": 2541, "task_loss": 1.4726858139038086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14064163136836716, "compression/movement_sparsity/importance_threshold": -0.006112700436609226, "compression/movement_sparsity/linear_layer_sparsity": 0.029284670614424694, "compression/movement_sparsity/model_sparsity": 0.02827865208863489, "compression_loss": 15.172111511230469, "distillation_loss": 0.45059359073638916, "epoch": 2.15, "learning_rate": 4.3617920540997466e-05, "loss": 15.6379, "step": 2542, "task_loss": 1.2211586236953735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14140482703302293, "compression/movement_sparsity/importance_threshold": -0.00610727175092599, "compression/movement_sparsity/linear_layer_sparsity": 0.02986028595871071, "compression/movement_sparsity/model_sparsity": 0.028834493275044757, "compression_loss": 15.254255294799805, "distillation_loss": 0.6567726731300354, "epoch": 2.15, "learning_rate": 4.361322438245515e-05, "loss": 15.7677, "step": 2543, "task_loss": 0.48129451274871826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14216757070144403, "compression/movement_sparsity/importance_threshold": -0.006101846280336342, "compression/movement_sparsity/linear_layer_sparsity": 0.030312068822098286, "compression/movement_sparsity/model_sparsity": 0.02927075600722832, "compression_loss": 15.336342811584473, "distillation_loss": 0.4167003333568573, "epoch": 2.15, "learning_rate": 4.3608528223912845e-05, "loss": 15.7596, "step": 2544, "task_loss": 0.5508861541748047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14292986250751516, "compression/movement_sparsity/importance_threshold": -0.006096424023887948, "compression/movement_sparsity/linear_layer_sparsity": 0.03063871947031512, "compression/movement_sparsity/model_sparsity": 0.02958618520078397, "compression_loss": 15.418383598327637, "distillation_loss": 0.6003167033195496, "epoch": 2.15, "learning_rate": 4.360383206537053e-05, "loss": 15.9612, "step": 2545, "task_loss": 0.7686643600463867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1436917025851222, "compression/movement_sparsity/importance_threshold": -0.006091004980628463, "compression/movement_sparsity/linear_layer_sparsity": 0.03103402947577925, "compression/movement_sparsity/model_sparsity": 0.029967915091444586, "compression_loss": 15.500371932983398, "distillation_loss": 0.411504328250885, "epoch": 2.15, "learning_rate": 4.359913590682821e-05, "loss": 15.9678, "step": 2546, "task_loss": 0.6307038068771362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14445309106814996, "compression/movement_sparsity/importance_threshold": -0.0060855891496055525, "compression/movement_sparsity/linear_layer_sparsity": 0.031333373780109953, "compression/movement_sparsity/model_sparsity": 0.030256975998030736, "compression_loss": 15.582296371459961, "distillation_loss": 0.45020604133605957, "epoch": 2.15, "learning_rate": 4.3594439748285904e-05, "loss": 15.9717, "step": 2547, "task_loss": 0.37627434730529785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14521402809048345, "compression/movement_sparsity/importance_threshold": -0.006080176529866879, "compression/movement_sparsity/linear_layer_sparsity": 0.031879798762023376, "compression/movement_sparsity/model_sparsity": 0.03078462960081557, "compression_loss": 15.664170265197754, "distillation_loss": 0.43388283252716064, "epoch": 2.15, "learning_rate": 4.358974358974359e-05, "loss": 16.1144, "step": 2548, "task_loss": 0.5741914510726929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.145974513786008, "compression/movement_sparsity/importance_threshold": -0.006074767120460102, "compression/movement_sparsity/linear_layer_sparsity": 0.032456940399766786, "compression/movement_sparsity/model_sparsity": 0.03134194464780714, "compression_loss": 15.745999336242676, "distillation_loss": 0.46094071865081787, "epoch": 2.15, "learning_rate": 4.3585047431201284e-05, "loss": 16.1849, "step": 2549, "task_loss": 0.9297397136688232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14673454828860866, "compression/movement_sparsity/importance_threshold": -0.0060693609204328855, "compression/movement_sparsity/linear_layer_sparsity": 0.032916366654608946, "compression/movement_sparsity/model_sparsity": 0.031785588197435, "compression_loss": 15.827778816223145, "distillation_loss": 0.8640276193618774, "epoch": 2.16, "learning_rate": 4.3580351272658963e-05, "loss": 16.3999, "step": 2550, "task_loss": 2.152651071548462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14749413173217052, "compression/movement_sparsity/importance_threshold": -0.006063957928832891, "compression/movement_sparsity/linear_layer_sparsity": 0.03335002478319001, "compression/movement_sparsity/model_sparsity": 0.03220434883521086, "compression_loss": 15.909488677978516, "distillation_loss": 0.43238234519958496, "epoch": 2.16, "learning_rate": 4.3575655114116657e-05, "loss": 16.3513, "step": 2551, "task_loss": 0.3273964524269104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14825326425057883, "compression/movement_sparsity/importance_threshold": -0.00605855814470778, "compression/movement_sparsity/linear_layer_sparsity": 0.03388116298219426, "compression/movement_sparsity/model_sparsity": 0.03271724080310709, "compression_loss": 15.991154670715332, "distillation_loss": 0.3342800438404083, "epoch": 2.16, "learning_rate": 4.357095895557434e-05, "loss": 16.5119, "step": 2552, "task_loss": 0.18783117830753326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1490119459777186, "compression/movement_sparsity/importance_threshold": -0.006053161567105214, "compression/movement_sparsity/linear_layer_sparsity": 0.03434824455265865, "compression/movement_sparsity/model_sparsity": 0.03316827668471505, "compression_loss": 16.072757720947266, "distillation_loss": 0.529985785484314, "epoch": 2.16, "learning_rate": 4.356626279703203e-05, "loss": 16.6363, "step": 2553, "task_loss": 1.089762806892395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14977017704747486, "compression/movement_sparsity/importance_threshold": -0.006047768195072857, "compression/movement_sparsity/linear_layer_sparsity": 0.034694713167486284, "compression/movement_sparsity/model_sparsity": 0.03350284303676122, "compression_loss": 16.15433120727539, "distillation_loss": 0.6017497777938843, "epoch": 2.16, "learning_rate": 4.3561566638489716e-05, "loss": 16.7109, "step": 2554, "task_loss": 0.8304197192192078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15052795759373316, "compression/movement_sparsity/importance_threshold": -0.006042378027658368, "compression/movement_sparsity/linear_layer_sparsity": 0.03526096804017815, "compression/movement_sparsity/model_sparsity": 0.03404964531257238, "compression_loss": 16.2358455657959, "distillation_loss": 0.3250453472137451, "epoch": 2.16, "learning_rate": 4.35568704799474e-05, "loss": 16.7634, "step": 2555, "task_loss": 0.42969292402267456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15128528775037842, "compression/movement_sparsity/importance_threshold": -0.006036991063909409, "compression/movement_sparsity/linear_layer_sparsity": 0.03589917334038481, "compression/movement_sparsity/model_sparsity": 0.03466592629736779, "compression_loss": 16.31730842590332, "distillation_loss": 0.6323897838592529, "epoch": 2.16, "learning_rate": 4.3552174321405095e-05, "loss": 16.8894, "step": 2556, "task_loss": 0.8422925472259521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15204216765129575, "compression/movement_sparsity/importance_threshold": -0.006031607302873643, "compression/movement_sparsity/linear_layer_sparsity": 0.036447494264952335, "compression/movement_sparsity/model_sparsity": 0.03519541071134396, "compression_loss": 16.398717880249023, "distillation_loss": 0.6535285115242004, "epoch": 2.16, "learning_rate": 4.354747816286278e-05, "loss": 17.0787, "step": 2557, "task_loss": 2.199420213699341 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1527985974303704, "compression/movement_sparsity/importance_threshold": -0.006026226743598732, "compression/movement_sparsity/linear_layer_sparsity": 0.03701222284418681, "compression/movement_sparsity/model_sparsity": 0.035740739126573405, "compression_loss": 16.48008155822754, "distillation_loss": 0.6828805208206177, "epoch": 2.16, "learning_rate": 4.354278200432047e-05, "loss": 16.9312, "step": 2558, "task_loss": 0.6502647399902344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15355457722148735, "compression/movement_sparsity/importance_threshold": -0.0060208493851323375, "compression/movement_sparsity/linear_layer_sparsity": 0.03753425097911725, "compression/movement_sparsity/model_sparsity": 0.03624483398912261, "compression_loss": 16.561389923095703, "distillation_loss": 0.5629466772079468, "epoch": 2.16, "learning_rate": 4.3538085845778154e-05, "loss": 17.1636, "step": 2559, "task_loss": 1.0756347179412842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15431010715853188, "compression/movement_sparsity/importance_threshold": -0.006015475226522121, "compression/movement_sparsity/linear_layer_sparsity": 0.03813448972957133, "compression/movement_sparsity/model_sparsity": 0.0368244526919482, "compression_loss": 16.642658233642578, "distillation_loss": 0.6912330389022827, "epoch": 2.16, "learning_rate": 4.353338968723584e-05, "loss": 17.3077, "step": 2560, "task_loss": 0.6354376077651978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1550651873753891, "compression/movement_sparsity/importance_threshold": -0.006010104266815744, "compression/movement_sparsity/linear_layer_sparsity": 0.038589325179873686, "compression/movement_sparsity/model_sparsity": 0.03726366314529517, "compression_loss": 16.723888397216797, "distillation_loss": 0.6584317088127136, "epoch": 2.16, "learning_rate": 4.3528693528693534e-05, "loss": 17.3247, "step": 2561, "task_loss": 0.6232728362083435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15581981800594402, "compression/movement_sparsity/importance_threshold": -0.00600473650506087, "compression/movement_sparsity/linear_layer_sparsity": 0.03909758090118471, "compression/movement_sparsity/model_sparsity": 0.037754458719001675, "compression_loss": 16.80506134033203, "distillation_loss": 0.7039515376091003, "epoch": 2.17, "learning_rate": 4.352399737015122e-05, "loss": 17.4575, "step": 2562, "task_loss": 1.2590793371200562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.156573999184082, "compression/movement_sparsity/importance_threshold": -0.005999371940305159, "compression/movement_sparsity/linear_layer_sparsity": 0.03968215129536526, "compression/movement_sparsity/model_sparsity": 0.038318947321793244, "compression_loss": 16.886186599731445, "distillation_loss": 0.3606104552745819, "epoch": 2.17, "learning_rate": 4.3519301211608906e-05, "loss": 17.2752, "step": 2563, "task_loss": 0.35552719235420227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1573277310436879, "compression/movement_sparsity/importance_threshold": -0.005994010571596275, "compression/movement_sparsity/linear_layer_sparsity": 0.040164460027900646, "compression/movement_sparsity/model_sparsity": 0.03878468726561083, "compression_loss": 16.96727752685547, "distillation_loss": 0.6653269529342651, "epoch": 2.17, "learning_rate": 4.351460505306659e-05, "loss": 17.5456, "step": 2564, "task_loss": 0.6124206185340881 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1580810137186469, "compression/movement_sparsity/importance_threshold": -0.005988652397981879, "compression/movement_sparsity/linear_layer_sparsity": 0.040515698309782626, "compression/movement_sparsity/model_sparsity": 0.03912385943197482, "compression_loss": 17.048316955566406, "distillation_loss": 0.507079005241394, "epoch": 2.17, "learning_rate": 4.350990889452428e-05, "loss": 17.7333, "step": 2565, "task_loss": 0.07242771238088608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15883384734284445, "compression/movement_sparsity/importance_threshold": -0.005983297418509632, "compression/movement_sparsity/linear_layer_sparsity": 0.04104074325912495, "compression/movement_sparsity/model_sparsity": 0.03963086747208004, "compression_loss": 17.12931251525879, "distillation_loss": 0.4160189926624298, "epoch": 2.17, "learning_rate": 4.350521273598197e-05, "loss": 17.6306, "step": 2566, "task_loss": 0.5723718404769897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15958623205016553, "compression/movement_sparsity/importance_threshold": -0.005977945632227196, "compression/movement_sparsity/linear_layer_sparsity": 0.041521537622370576, "compression/movement_sparsity/model_sparsity": 0.04009514506985172, "compression_loss": 17.210256576538086, "distillation_loss": 0.3851495683193207, "epoch": 2.17, "learning_rate": 4.350051657743965e-05, "loss": 17.7527, "step": 2567, "task_loss": 0.39192065596580505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1603381679744953, "compression/movement_sparsity/importance_threshold": -0.005972597038182232, "compression/movement_sparsity/linear_layer_sparsity": 0.042049647082795315, "compression/movement_sparsity/model_sparsity": 0.04060511234565614, "compression_loss": 17.291149139404297, "distillation_loss": 0.6044194102287292, "epoch": 2.17, "learning_rate": 4.3495820418897345e-05, "loss": 17.7583, "step": 2568, "task_loss": 0.4124767780303955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16108965524971874, "compression/movement_sparsity/importance_threshold": -0.005967251635422405, "compression/movement_sparsity/linear_layer_sparsity": 0.042713596660927805, "compression/movement_sparsity/model_sparsity": 0.04124625321323196, "compression_loss": 17.37198829650879, "distillation_loss": 0.41274628043174744, "epoch": 2.17, "learning_rate": 4.349112426035503e-05, "loss": 17.8658, "step": 2569, "task_loss": 1.1378264427185059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16184069400972123, "compression/movement_sparsity/importance_threshold": -0.005961909422995373, "compression/movement_sparsity/linear_layer_sparsity": 0.04316537952431538, "compression/movement_sparsity/model_sparsity": 0.04168251594541553, "compression_loss": 17.452768325805664, "distillation_loss": 0.20618055760860443, "epoch": 2.17, "learning_rate": 4.348642810181272e-05, "loss": 17.9352, "step": 2570, "task_loss": 1.4737643003463745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16259128438838777, "compression/movement_sparsity/importance_threshold": -0.005956570399948799, "compression/movement_sparsity/linear_layer_sparsity": 0.043650931630447726, "compression/movement_sparsity/model_sparsity": 0.04215138784296923, "compression_loss": 17.53348159790039, "distillation_loss": 0.48586028814315796, "epoch": 2.17, "learning_rate": 4.348173194327041e-05, "loss": 18.1308, "step": 2571, "task_loss": 0.5540573596954346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16334142651960348, "compression/movement_sparsity/importance_threshold": -0.005951234565330347, "compression/movement_sparsity/linear_layer_sparsity": 0.04397757035449692, "compression/movement_sparsity/model_sparsity": 0.04246680552198909, "compression_loss": 17.614181518554688, "distillation_loss": 0.7684342861175537, "epoch": 2.17, "learning_rate": 4.347703578472809e-05, "loss": 18.1227, "step": 2572, "task_loss": 1.214626431465149 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1640911205372535, "compression/movement_sparsity/importance_threshold": -0.005945901918187678, "compression/movement_sparsity/linear_layer_sparsity": 0.04453180566621183, "compression/movement_sparsity/model_sparsity": 0.04300200114571934, "compression_loss": 17.694805145263672, "distillation_loss": 0.5023795366287231, "epoch": 2.17, "learning_rate": 4.347233962618578e-05, "loss": 18.267, "step": 2573, "task_loss": 0.9113539457321167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16484036657522272, "compression/movement_sparsity/importance_threshold": -0.005940572457568455, "compression/movement_sparsity/linear_layer_sparsity": 0.04500803307325293, "compression/movement_sparsity/model_sparsity": 0.04346186867628171, "compression_loss": 17.775392532348633, "distillation_loss": 0.5445961952209473, "epoch": 2.18, "learning_rate": 4.346764346764347e-05, "loss": 18.4147, "step": 2574, "task_loss": 1.0542140007019043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16558916476739693, "compression/movement_sparsity/importance_threshold": -0.005935246182520335, "compression/movement_sparsity/linear_layer_sparsity": 0.04545410426034293, "compression/movement_sparsity/model_sparsity": 0.04389261594581969, "compression_loss": 17.855934143066406, "distillation_loss": 0.386962354183197, "epoch": 2.18, "learning_rate": 4.346294730910116e-05, "loss": 18.33, "step": 2575, "task_loss": 0.23669065535068512 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16633751524766094, "compression/movement_sparsity/importance_threshold": -0.0059299230920909825, "compression/movement_sparsity/linear_layer_sparsity": 0.04602491416507169, "compression/movement_sparsity/model_sparsity": 0.044443816774304354, "compression_loss": 17.93641471862793, "distillation_loss": 0.6054110527038574, "epoch": 2.18, "learning_rate": 4.345825115055884e-05, "loss": 18.5216, "step": 2576, "task_loss": 0.538459062576294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16708541814989974, "compression/movement_sparsity/importance_threshold": -0.0059246031853280616, "compression/movement_sparsity/linear_layer_sparsity": 0.0466112016393918, "compression/movement_sparsity/model_sparsity": 0.04500996347025034, "compression_loss": 18.01683807373047, "distillation_loss": 0.4889378547668457, "epoch": 2.18, "learning_rate": 4.345355499201653e-05, "loss": 18.6119, "step": 2577, "task_loss": 0.5102624297142029 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1678328736079987, "compression/movement_sparsity/importance_threshold": -0.005919286461279231, "compression/movement_sparsity/linear_layer_sparsity": 0.047108773306501095, "compression/movement_sparsity/model_sparsity": 0.04549044201988493, "compression_loss": 18.09718894958496, "distillation_loss": 0.5456862449645996, "epoch": 2.18, "learning_rate": 4.344885883347422e-05, "loss": 18.6073, "step": 2578, "task_loss": 1.8993914127349854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16857988175584282, "compression/movement_sparsity/importance_threshold": -0.005913972918992155, "compression/movement_sparsity/linear_layer_sparsity": 0.04765844166201147, "compression/movement_sparsity/model_sparsity": 0.04602122757640588, "compression_loss": 18.17752456665039, "distillation_loss": 0.6120045185089111, "epoch": 2.18, "learning_rate": 4.344416267493191e-05, "loss": 18.8401, "step": 2579, "task_loss": 0.600737452507019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16932644272731723, "compression/movement_sparsity/importance_threshold": -0.005908662557514493, "compression/movement_sparsity/linear_layer_sparsity": 0.04809516430167495, "compression/movement_sparsity/model_sparsity": 0.046442947449880936, "compression_loss": 18.257793426513672, "distillation_loss": 0.6718224883079529, "epoch": 2.18, "learning_rate": 4.3439466516389594e-05, "loss": 19.0176, "step": 2580, "task_loss": 0.38432231545448303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17007255665630716, "compression/movement_sparsity/importance_threshold": -0.00590335537589391, "compression/movement_sparsity/linear_layer_sparsity": 0.048608189690040327, "compression/movement_sparsity/model_sparsity": 0.046938348837905264, "compression_loss": 18.338003158569336, "distillation_loss": 0.593977153301239, "epoch": 2.18, "learning_rate": 4.343477035784728e-05, "loss": 18.9205, "step": 2581, "task_loss": 0.4067271947860718 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17081822367669763, "compression/movement_sparsity/importance_threshold": -0.0058980513731780645, "compression/movement_sparsity/linear_layer_sparsity": 0.04905389122793361, "compression/movement_sparsity/model_sparsity": 0.04736873915683361, "compression_loss": 18.41817855834961, "distillation_loss": 0.5458493232727051, "epoch": 2.18, "learning_rate": 4.3430074199304974e-05, "loss": 19.0176, "step": 2582, "task_loss": 0.49337348341941833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17156344392237388, "compression/movement_sparsity/importance_threshold": -0.005892750548414621, "compression/movement_sparsity/linear_layer_sparsity": 0.049560620655787244, "compression/movement_sparsity/model_sparsity": 0.04785806086995841, "compression_loss": 18.498313903808594, "distillation_loss": 0.5317829251289368, "epoch": 2.18, "learning_rate": 4.342537804076266e-05, "loss": 19.0265, "step": 2583, "task_loss": 1.0280494689941406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17230821752722114, "compression/movement_sparsity/importance_threshold": -0.005887452900651239, "compression/movement_sparsity/linear_layer_sparsity": 0.05008873011621198, "compression/movement_sparsity/model_sparsity": 0.04836802814576283, "compression_loss": 18.578393936157227, "distillation_loss": 0.8802231550216675, "epoch": 2.18, "learning_rate": 4.3420681882220346e-05, "loss": 19.236, "step": 2584, "task_loss": 0.6657559275627136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17305254462512398, "compression/movement_sparsity/importance_threshold": -0.005882158428935585, "compression/movement_sparsity/linear_layer_sparsity": 0.0505771320984093, "compression/movement_sparsity/model_sparsity": 0.04883965201737143, "compression_loss": 18.658437728881836, "distillation_loss": 0.39064058661460876, "epoch": 2.19, "learning_rate": 4.341598572367803e-05, "loss": 19.2256, "step": 2585, "task_loss": 0.6167714595794678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17379642534996842, "compression/movement_sparsity/importance_threshold": -0.005876867132315315, "compression/movement_sparsity/linear_layer_sparsity": 0.05126568123437457, "compression/movement_sparsity/model_sparsity": 0.04950454737229139, "compression_loss": 18.738422393798828, "distillation_loss": 0.6585283279418945, "epoch": 2.19, "learning_rate": 4.341128956513572e-05, "loss": 19.2675, "step": 2586, "task_loss": 0.904732346534729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17453985983563913, "compression/movement_sparsity/importance_threshold": -0.005871579009838093, "compression/movement_sparsity/linear_layer_sparsity": 0.05185026355272276, "compression/movement_sparsity/model_sparsity": 0.050069047489618754, "compression_loss": 18.81835174560547, "distillation_loss": 0.6577267646789551, "epoch": 2.19, "learning_rate": 4.340659340659341e-05, "loss": 19.379, "step": 2587, "task_loss": 0.41847237944602966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17528284821602125, "compression/movement_sparsity/importance_threshold": -0.005866294060551581, "compression/movement_sparsity/linear_layer_sparsity": 0.05241519484280704, "compression/movement_sparsity/model_sparsity": 0.050614571651956715, "compression_loss": 18.898223876953125, "distillation_loss": 0.49571147561073303, "epoch": 2.19, "learning_rate": 4.34018972480511e-05, "loss": 19.4477, "step": 2588, "task_loss": 1.4030848741531372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.176025390625, "compression/movement_sparsity/importance_threshold": -0.005861012283503442, "compression/movement_sparsity/linear_layer_sparsity": 0.05297992342204151, "compression/movement_sparsity/model_sparsity": 0.05115990006718617, "compression_loss": 18.9780330657959, "distillation_loss": 0.5149156451225281, "epoch": 2.19, "learning_rate": 4.3397201089508785e-05, "loss": 19.428, "step": 2589, "task_loss": 0.9481977224349976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17676748719646052, "compression/movement_sparsity/importance_threshold": -0.005855733677741335, "compression/movement_sparsity/linear_layer_sparsity": 0.05370341036917986, "compression/movement_sparsity/model_sparsity": 0.051858533011984134, "compression_loss": 19.057804107666016, "distillation_loss": 0.7733625173568726, "epoch": 2.19, "learning_rate": 4.339250493096647e-05, "loss": 19.6482, "step": 2590, "task_loss": 1.3754632472991943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17750913806428792, "compression/movement_sparsity/importance_threshold": -0.005850458242312926, "compression/movement_sparsity/linear_layer_sparsity": 0.054135923777667884, "compression/movement_sparsity/model_sparsity": 0.052276188254323716, "compression_loss": 19.137531280517578, "distillation_loss": 0.43490564823150635, "epoch": 2.19, "learning_rate": 4.338780877242416e-05, "loss": 19.5388, "step": 2591, "task_loss": 0.9305901527404785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17825034336236723, "compression/movement_sparsity/importance_threshold": -0.005845185976265874, "compression/movement_sparsity/linear_layer_sparsity": 0.054654851629013006, "compression/movement_sparsity/model_sparsity": 0.05277728933756634, "compression_loss": 19.217208862304688, "distillation_loss": 0.5842649340629578, "epoch": 2.19, "learning_rate": 4.338311261388185e-05, "loss": 19.6123, "step": 2592, "task_loss": 0.34575918316841125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17899110322458378, "compression/movement_sparsity/importance_threshold": -0.005839916878647841, "compression/movement_sparsity/linear_layer_sparsity": 0.05513564599225864, "compression/movement_sparsity/model_sparsity": 0.05324156693533802, "compression_loss": 19.296836853027344, "distillation_loss": 0.5343648195266724, "epoch": 2.19, "learning_rate": 4.337841645533953e-05, "loss": 19.6776, "step": 2593, "task_loss": 0.27075061202049255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17973141778482238, "compression/movement_sparsity/importance_threshold": -0.005834650948506491, "compression/movement_sparsity/linear_layer_sparsity": 0.05576447889670351, "compression/movement_sparsity/model_sparsity": 0.05384879749499892, "compression_loss": 19.376394271850586, "distillation_loss": 0.32708224654197693, "epoch": 2.19, "learning_rate": 4.3373720296797223e-05, "loss": 19.847, "step": 2594, "task_loss": 0.45126432180404663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1804712871769687, "compression/movement_sparsity/importance_threshold": -0.005829388184889482, "compression/movement_sparsity/linear_layer_sparsity": 0.056396388236398434, "compression/movement_sparsity/model_sparsity": 0.05445899880489481, "compression_loss": 19.455890655517578, "distillation_loss": 0.7827243208885193, "epoch": 2.19, "learning_rate": 4.336902413825491e-05, "loss": 20.094, "step": 2595, "task_loss": 1.217795729637146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18121071153490753, "compression/movement_sparsity/importance_threshold": -0.005824128586844479, "compression/movement_sparsity/linear_layer_sparsity": 0.05693669612031466, "compression/movement_sparsity/model_sparsity": 0.05498074545081705, "compression_loss": 19.535369873046875, "distillation_loss": 0.49606624245643616, "epoch": 2.19, "learning_rate": 4.3364327979712596e-05, "loss": 20.0713, "step": 2596, "task_loss": 1.4898377656936646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18194969099252412, "compression/movement_sparsity/importance_threshold": -0.005818872153419141, "compression/movement_sparsity/linear_layer_sparsity": 0.05745259523308027, "compression/movement_sparsity/model_sparsity": 0.05547892184196786, "compression_loss": 19.61480140686035, "distillation_loss": 0.6626887321472168, "epoch": 2.2, "learning_rate": 4.335963182117028e-05, "loss": 20.2137, "step": 2597, "task_loss": 0.4031434655189514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18268822568370358, "compression/movement_sparsity/importance_threshold": -0.005813618883661133, "compression/movement_sparsity/linear_layer_sparsity": 0.057974778382189986, "compression/movement_sparsity/model_sparsity": 0.055983166393482386, "compression_loss": 19.69417953491211, "distillation_loss": 0.5124004483222961, "epoch": 2.2, "learning_rate": 4.335493566262797e-05, "loss": 20.1755, "step": 2598, "task_loss": 0.5819061398506165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18342631574233104, "compression/movement_sparsity/importance_threshold": -0.0058083687766181166, "compression/movement_sparsity/linear_layer_sparsity": 0.058554781820166, "compression/movement_sparsity/model_sparsity": 0.056543244929064644, "compression_loss": 19.77351951599121, "distillation_loss": 0.6369973421096802, "epoch": 2.2, "learning_rate": 4.335023950408566e-05, "loss": 20.39, "step": 2599, "task_loss": 1.0740019083023071 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18416396130229173, "compression/movement_sparsity/importance_threshold": -0.00580312183133775, "compression/movement_sparsity/linear_layer_sparsity": 0.059084620284897936, "compression/movement_sparsity/model_sparsity": 0.05705488181255927, "compression_loss": 19.852800369262695, "distillation_loss": 0.33787956833839417, "epoch": 2.2, "learning_rate": 4.334554334554335e-05, "loss": 20.2958, "step": 2600, "task_loss": 0.4420090913772583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18490116249747057, "compression/movement_sparsity/importance_threshold": -0.0057978780468677, "compression/movement_sparsity/linear_layer_sparsity": 0.05969513766785412, "compression/movement_sparsity/model_sparsity": 0.05764442604523976, "compression_loss": 19.932022094726562, "distillation_loss": 0.43002378940582275, "epoch": 2.2, "learning_rate": 4.334084718700104e-05, "loss": 20.4813, "step": 2601, "task_loss": 1.3343119621276855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1856379194617529, "compression/movement_sparsity/importance_threshold": -0.005792637422255624, "compression/movement_sparsity/linear_layer_sparsity": 0.06016069294486112, "compression/movement_sparsity/model_sparsity": 0.05809398806626601, "compression_loss": 20.011186599731445, "distillation_loss": 0.7209482192993164, "epoch": 2.2, "learning_rate": 4.333615102845872e-05, "loss": 20.5879, "step": 2602, "task_loss": 0.3718501627445221 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18637423232902384, "compression/movement_sparsity/importance_threshold": -0.0057873999565491874, "compression/movement_sparsity/linear_layer_sparsity": 0.06066741044854712, "compression/movement_sparsity/model_sparsity": 0.058583298264855026, "compression_loss": 20.090314865112305, "distillation_loss": 0.5073649883270264, "epoch": 2.2, "learning_rate": 4.333145486991641e-05, "loss": 20.599, "step": 2603, "task_loss": 0.15572495758533478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1871101012331684, "compression/movement_sparsity/importance_threshold": -0.0057821656487960504, "compression/movement_sparsity/linear_layer_sparsity": 0.06109478454078409, "compression/movement_sparsity/model_sparsity": 0.05899599074226716, "compression_loss": 20.169374465942383, "distillation_loss": 0.565415620803833, "epoch": 2.2, "learning_rate": 4.33267587113741e-05, "loss": 20.6655, "step": 2604, "task_loss": 0.36790555715560913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1878455263080716, "compression/movement_sparsity/importance_threshold": -0.005776934498043877, "compression/movement_sparsity/linear_layer_sparsity": 0.06169461786953854, "compression/movement_sparsity/model_sparsity": 0.059575217950875746, "compression_loss": 20.248397827148438, "distillation_loss": 0.5768604278564453, "epoch": 2.2, "learning_rate": 4.332206255283179e-05, "loss": 20.7656, "step": 2605, "task_loss": 0.601012647151947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18858050768761903, "compression/movement_sparsity/importance_threshold": -0.005771706503340325, "compression/movement_sparsity/linear_layer_sparsity": 0.0622351046159693, "compression/movement_sparsity/model_sparsity": 0.060097137314834896, "compression_loss": 20.327377319335938, "distillation_loss": 0.5254782438278198, "epoch": 2.2, "learning_rate": 4.331736639428947e-05, "loss": 20.8424, "step": 2606, "task_loss": 0.8878291249275208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18931504550569556, "compression/movement_sparsity/importance_threshold": -0.0057664816637330585, "compression/movement_sparsity/linear_layer_sparsity": 0.06267162454478298, "compression/movement_sparsity/model_sparsity": 0.060518661441201445, "compression_loss": 20.406280517578125, "distillation_loss": 0.24258063733577728, "epoch": 2.2, "learning_rate": 4.331267023574716e-05, "loss": 20.8265, "step": 2607, "task_loss": 0.09308204799890518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19004913989618644, "compression/movement_sparsity/importance_threshold": -0.005761259978269739, "compression/movement_sparsity/linear_layer_sparsity": 0.0633233518510887, "compression/movement_sparsity/model_sparsity": 0.061147999909587865, "compression_loss": 20.485149383544922, "distillation_loss": 0.5478302240371704, "epoch": 2.2, "learning_rate": 4.330797407720485e-05, "loss": 20.9955, "step": 2608, "task_loss": 0.43576210737228394 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19078279099297668, "compression/movement_sparsity/importance_threshold": -0.005756041445998028, "compression/movement_sparsity/linear_layer_sparsity": 0.0638636835833402, "compression/movement_sparsity/model_sparsity": 0.06166976958458169, "compression_loss": 20.56397247314453, "distillation_loss": 0.5506159663200378, "epoch": 2.21, "learning_rate": 4.330327791866254e-05, "loss": 21.084, "step": 2609, "task_loss": 0.7583205103874207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19151599892995141, "compression/movement_sparsity/importance_threshold": -0.0057508260659655894, "compression/movement_sparsity/linear_layer_sparsity": 0.06455052756333354, "compression/movement_sparsity/model_sparsity": 0.06233301836088303, "compression_loss": 20.642723083496094, "distillation_loss": 0.24049726128578186, "epoch": 2.21, "learning_rate": 4.329858176012022e-05, "loss": 21.0301, "step": 2610, "task_loss": 0.3671181797981262 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19224876384099587, "compression/movement_sparsity/importance_threshold": -0.005745613837220083, "compression/movement_sparsity/linear_layer_sparsity": 0.06510305771907653, "compression/movement_sparsity/model_sparsity": 0.06286656740599467, "compression_loss": 20.72142219543457, "distillation_loss": 0.5489833354949951, "epoch": 2.21, "learning_rate": 4.329388560157791e-05, "loss": 21.4358, "step": 2611, "task_loss": 1.1624144315719604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19298108585999518, "compression/movement_sparsity/importance_threshold": -0.005740404758809171, "compression/movement_sparsity/linear_layer_sparsity": 0.06583874308970637, "compression/movement_sparsity/model_sparsity": 0.06357697972091045, "compression_loss": 20.800065994262695, "distillation_loss": 0.43942347168922424, "epoch": 2.21, "learning_rate": 4.32891894430356e-05, "loss": 21.2593, "step": 2612, "task_loss": 0.6376338601112366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19371296512083447, "compression/movement_sparsity/importance_threshold": -0.0057351988297805155, "compression/movement_sparsity/linear_layer_sparsity": 0.06647215487452342, "compression/movement_sparsity/model_sparsity": 0.06418863186231646, "compression_loss": 20.87865447998047, "distillation_loss": 0.36251717805862427, "epoch": 2.21, "learning_rate": 4.328449328449329e-05, "loss": 21.4119, "step": 2613, "task_loss": 0.05852902680635452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19444440175739863, "compression/movement_sparsity/importance_threshold": -0.0057299960491817795, "compression/movement_sparsity/linear_layer_sparsity": 0.0670035077085451, "compression/movement_sparsity/model_sparsity": 0.06470173109185699, "compression_loss": 20.957197189331055, "distillation_loss": 0.3656657636165619, "epoch": 2.21, "learning_rate": 4.327979712595097e-05, "loss": 21.48, "step": 2614, "task_loss": 0.30097368359565735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19517539590357325, "compression/movement_sparsity/importance_threshold": -0.0057247964160606225, "compression/movement_sparsity/linear_layer_sparsity": 0.0675865637334359, "compression/movement_sparsity/model_sparsity": 0.06526475734860265, "compression_loss": 21.03570556640625, "distillation_loss": 0.3687317371368408, "epoch": 2.21, "learning_rate": 4.3275100967408664e-05, "loss": 21.5355, "step": 2615, "task_loss": 0.5483551025390625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19590594769324332, "compression/movement_sparsity/importance_threshold": -0.005719599929464707, "compression/movement_sparsity/linear_layer_sparsity": 0.06825509219194056, "compression/movement_sparsity/model_sparsity": 0.06591031979792358, "compression_loss": 21.114147186279297, "distillation_loss": 0.45096608996391296, "epoch": 2.21, "learning_rate": 4.327040480886635e-05, "loss": 21.5438, "step": 2616, "task_loss": 0.865093469619751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19663605726029387, "compression/movement_sparsity/importance_threshold": -0.005714406588441696, "compression/movement_sparsity/linear_layer_sparsity": 0.0689726528277639, "compression/movement_sparsity/model_sparsity": 0.06660323001843166, "compression_loss": 21.192527770996094, "distillation_loss": 0.4533529281616211, "epoch": 2.21, "learning_rate": 4.3265708650324036e-05, "loss": 21.664, "step": 2617, "task_loss": 1.151792287826538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19736572473861014, "compression/movement_sparsity/importance_threshold": -0.005709216392039251, "compression/movement_sparsity/linear_layer_sparsity": 0.06972357728463237, "compression/movement_sparsity/model_sparsity": 0.06732835791009287, "compression_loss": 21.270870208740234, "distillation_loss": 0.693733274936676, "epoch": 2.21, "learning_rate": 4.326101249178173e-05, "loss": 21.9074, "step": 2618, "task_loss": 0.8498420715332031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19809495026207724, "compression/movement_sparsity/importance_threshold": -0.005704029339305033, "compression/movement_sparsity/linear_layer_sparsity": 0.07037702167107765, "compression/movement_sparsity/model_sparsity": 0.0679593544716337, "compression_loss": 21.349172592163086, "distillation_loss": 0.8976700305938721, "epoch": 2.21, "learning_rate": 4.325631633323941e-05, "loss": 22.1591, "step": 2619, "task_loss": 0.6339174509048462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1988237339645803, "compression/movement_sparsity/importance_threshold": -0.005698845429286704, "compression/movement_sparsity/linear_layer_sparsity": 0.07099365615203104, "compression/movement_sparsity/model_sparsity": 0.06855480566117679, "compression_loss": 21.427438735961914, "distillation_loss": 1.20167875289917, "epoch": 2.21, "learning_rate": 4.32516201746971e-05, "loss": 22.1606, "step": 2620, "task_loss": 1.4469218254089355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19955207598000446, "compression/movement_sparsity/importance_threshold": -0.0056936646610319264, "compression/movement_sparsity/linear_layer_sparsity": 0.07163163489305262, "compression/movement_sparsity/model_sparsity": 0.06917086786979211, "compression_loss": 21.505645751953125, "distillation_loss": 0.8758472204208374, "epoch": 2.22, "learning_rate": 4.324692401615479e-05, "loss": 22.1991, "step": 2621, "task_loss": 0.4129016101360321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20027997644223483, "compression/movement_sparsity/importance_threshold": -0.005688487033588362, "compression/movement_sparsity/linear_layer_sparsity": 0.0722818478300686, "compression/movement_sparsity/model_sparsity": 0.06979874399213262, "compression_loss": 21.583820343017578, "distillation_loss": 0.3956608176231384, "epoch": 2.22, "learning_rate": 4.3242227857612475e-05, "loss": 22.054, "step": 2622, "task_loss": 0.7425188422203064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20100743548515654, "compression/movement_sparsity/importance_threshold": -0.005683312546003674, "compression/movement_sparsity/linear_layer_sparsity": 0.07285423172492529, "compression/movement_sparsity/model_sparsity": 0.07035146473934217, "compression_loss": 21.661941528320312, "distillation_loss": 0.40547487139701843, "epoch": 2.22, "learning_rate": 4.323753169907016e-05, "loss": 22.2319, "step": 2623, "task_loss": 1.0078785419464111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20173445324265482, "compression/movement_sparsity/importance_threshold": -0.005678141197325522, "compression/movement_sparsity/linear_layer_sparsity": 0.07334895351596961, "compression/movement_sparsity/model_sparsity": 0.07082919131492188, "compression_loss": 21.740007400512695, "distillation_loss": 0.3886815011501312, "epoch": 2.22, "learning_rate": 4.323283554052785e-05, "loss": 22.289, "step": 2624, "task_loss": 0.4149186313152313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20246102984861447, "compression/movement_sparsity/importance_threshold": -0.00567297298660157, "compression/movement_sparsity/linear_layer_sparsity": 0.07396864058383777, "compression/movement_sparsity/model_sparsity": 0.07142759022562836, "compression_loss": 21.81803321838379, "distillation_loss": 0.7885529398918152, "epoch": 2.22, "learning_rate": 4.322813938198554e-05, "loss": 22.5255, "step": 2625, "task_loss": 0.9068634510040283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20318716543692117, "compression/movement_sparsity/importance_threshold": -0.005667807912879476, "compression/movement_sparsity/linear_layer_sparsity": 0.07448013967874575, "compression/movement_sparsity/model_sparsity": 0.07192151775307098, "compression_loss": 21.896011352539062, "distillation_loss": 0.7361727356910706, "epoch": 2.22, "learning_rate": 4.322344322344323e-05, "loss": 22.6279, "step": 2626, "task_loss": 0.6048960089683533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20391286014145993, "compression/movement_sparsity/importance_threshold": -0.005662645975206906, "compression/movement_sparsity/linear_layer_sparsity": 0.07501130172608528, "compression/movement_sparsity/model_sparsity": 0.0724344327500388, "compression_loss": 21.973955154418945, "distillation_loss": 0.3749714195728302, "epoch": 2.22, "learning_rate": 4.321874706490091e-05, "loss": 22.5326, "step": 2627, "task_loss": 0.5550377368927002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20463811409611565, "compression/movement_sparsity/importance_threshold": -0.005657487172631519, "compression/movement_sparsity/linear_layer_sparsity": 0.07549975140495313, "compression/movement_sparsity/model_sparsity": 0.07290610267979059, "compression_loss": 22.051856994628906, "distillation_loss": 0.6773885488510132, "epoch": 2.22, "learning_rate": 4.32140509063586e-05, "loss": 22.6969, "step": 2628, "task_loss": 1.3753666877746582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20536292743477358, "compression/movement_sparsity/importance_threshold": -0.005652331504200979, "compression/movement_sparsity/linear_layer_sparsity": 0.07608909146618802, "compression/movement_sparsity/model_sparsity": 0.07347519709689997, "compression_loss": 22.12969207763672, "distillation_loss": 0.5389634370803833, "epoch": 2.22, "learning_rate": 4.3209354747816286e-05, "loss": 22.5895, "step": 2629, "task_loss": 0.8067761063575745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20608730029131894, "compression/movement_sparsity/importance_threshold": -0.005647178968962946, "compression/movement_sparsity/linear_layer_sparsity": 0.07680338488007912, "compression/movement_sparsity/model_sparsity": 0.07416495233460034, "compression_loss": 22.207473754882812, "distillation_loss": 0.4898052215576172, "epoch": 2.22, "learning_rate": 4.320465858927398e-05, "loss": 22.6981, "step": 2630, "task_loss": 0.2666015625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20681123279963676, "compression/movement_sparsity/importance_threshold": -0.005642029565965084, "compression/movement_sparsity/linear_layer_sparsity": 0.07735744132927949, "compression/movement_sparsity/model_sparsity": 0.07469997524029368, "compression_loss": 22.285205841064453, "distillation_loss": 0.6534202694892883, "epoch": 2.22, "learning_rate": 4.319996243073166e-05, "loss": 22.8388, "step": 2631, "task_loss": 1.0028841495513916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20753472509361226, "compression/movement_sparsity/importance_threshold": -0.005636883294255053, "compression/movement_sparsity/linear_layer_sparsity": 0.07794357378942035, "compression/movement_sparsity/model_sparsity": 0.07526597224727434, "compression_loss": 22.362873077392578, "distillation_loss": 0.7652188539505005, "epoch": 2.22, "learning_rate": 4.319526627218935e-05, "loss": 23.0477, "step": 2632, "task_loss": 1.4442565441131592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20825777730713058, "compression/movement_sparsity/importance_threshold": -0.005631740152880515, "compression/movement_sparsity/linear_layer_sparsity": 0.07865483846473194, "compression/movement_sparsity/model_sparsity": 0.0759528027928829, "compression_loss": 22.440507888793945, "distillation_loss": 0.5427497625350952, "epoch": 2.23, "learning_rate": 4.319057011364704e-05, "loss": 23.0196, "step": 2633, "task_loss": 0.4118804633617401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2089803895740765, "compression/movement_sparsity/importance_threshold": -0.005626600140889134, "compression/movement_sparsity/linear_layer_sparsity": 0.07935999796621397, "compression/movement_sparsity/model_sparsity": 0.07663373789616465, "compression_loss": 22.51807403564453, "distillation_loss": 0.47500932216644287, "epoch": 2.23, "learning_rate": 4.3185873955104724e-05, "loss": 23.1264, "step": 2634, "task_loss": 0.2947646975517273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2097025620283357, "compression/movement_sparsity/importance_threshold": -0.00562146325732857, "compression/movement_sparsity/linear_layer_sparsity": 0.08005906421803408, "compression/movement_sparsity/model_sparsity": 0.0773087890716554, "compression_loss": 22.595600128173828, "distillation_loss": 0.42827779054641724, "epoch": 2.23, "learning_rate": 4.318117779656242e-05, "loss": 23.1725, "step": 2635, "task_loss": 1.0547657012939453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2104242948037932, "compression/movement_sparsity/importance_threshold": -0.005616329501246484, "compression/movement_sparsity/linear_layer_sparsity": 0.08054443746165188, "compression/movement_sparsity/model_sparsity": 0.07777748825117219, "compression_loss": 22.67308235168457, "distillation_loss": 0.3451380133628845, "epoch": 2.23, "learning_rate": 4.31764816380201e-05, "loss": 23.3693, "step": 2636, "task_loss": 1.1388704776763916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21114558803433403, "compression/movement_sparsity/importance_threshold": -0.005611198871690539, "compression/movement_sparsity/linear_layer_sparsity": 0.08119482926118239, "compression/movement_sparsity/model_sparsity": 0.07840553709154963, "compression_loss": 22.750507354736328, "distillation_loss": 0.7767535448074341, "epoch": 2.23, "learning_rate": 4.317178547947779e-05, "loss": 23.321, "step": 2637, "task_loss": 0.6183381676673889 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2118664418538433, "compression/movement_sparsity/importance_threshold": -0.005606071367708397, "compression/movement_sparsity/linear_layer_sparsity": 0.08182062150288012, "compression/movement_sparsity/model_sparsity": 0.07900983144458291, "compression_loss": 22.827890396118164, "distillation_loss": 0.239285409450531, "epoch": 2.23, "learning_rate": 4.3167089320935477e-05, "loss": 23.3305, "step": 2638, "task_loss": 0.020945247262716293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21258685639620623, "compression/movement_sparsity/importance_threshold": -0.0056009469883477195, "compression/movement_sparsity/linear_layer_sparsity": 0.08243727983216877, "compression/movement_sparsity/model_sparsity": 0.0796053056631976, "compression_loss": 22.90520668029785, "distillation_loss": 0.7843768000602722, "epoch": 2.23, "learning_rate": 4.316239316239317e-05, "loss": 23.6227, "step": 2639, "task_loss": 0.3737318515777588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21330683179530796, "compression/movement_sparsity/importance_threshold": -0.005595825732656168, "compression/movement_sparsity/linear_layer_sparsity": 0.08298847448113654, "compression/movement_sparsity/model_sparsity": 0.08013756508030025, "compression_loss": 22.982471466064453, "distillation_loss": 0.37306562066078186, "epoch": 2.23, "learning_rate": 4.315769700385085e-05, "loss": 23.4888, "step": 2640, "task_loss": 0.6363945603370667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21402636818503362, "compression/movement_sparsity/importance_threshold": -0.005590707599681405, "compression/movement_sparsity/linear_layer_sparsity": 0.0836737921676725, "compression/movement_sparsity/model_sparsity": 0.08079933999601989, "compression_loss": 23.059688568115234, "distillation_loss": 0.6421718597412109, "epoch": 2.23, "learning_rate": 4.3153000845308536e-05, "loss": 23.7357, "step": 2641, "task_loss": 1.79175865650177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21474546569926822, "compression/movement_sparsity/importance_threshold": -0.0055855925884710925, "compression/movement_sparsity/linear_layer_sparsity": 0.08441251820104947, "compression/movement_sparsity/model_sparsity": 0.08151268851756328, "compression_loss": 23.136859893798828, "distillation_loss": 0.27887701988220215, "epoch": 2.23, "learning_rate": 4.314830468676623e-05, "loss": 23.6534, "step": 2642, "task_loss": 0.6620002388954163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2154641244718971, "compression/movement_sparsity/importance_threshold": -0.005580480698072892, "compression/movement_sparsity/linear_layer_sparsity": 0.08514076289107454, "compression/movement_sparsity/model_sparsity": 0.08221591576214327, "compression_loss": 23.213953018188477, "distillation_loss": 0.21152347326278687, "epoch": 2.23, "learning_rate": 4.3143608528223915e-05, "loss": 23.6475, "step": 2643, "task_loss": 0.8504502177238464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2161823446368053, "compression/movement_sparsity/importance_threshold": -0.005575371927534465, "compression/movement_sparsity/linear_layer_sparsity": 0.08578793516534337, "compression/movement_sparsity/model_sparsity": 0.08284085567785618, "compression_loss": 23.291006088256836, "distillation_loss": 0.5194219946861267, "epoch": 2.23, "learning_rate": 4.31389123696816e-05, "loss": 24.024, "step": 2644, "task_loss": 0.6820626258850098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2169001263278777, "compression/movement_sparsity/importance_threshold": -0.005570266275903475, "compression/movement_sparsity/linear_layer_sparsity": 0.08643967439581673, "compression/movement_sparsity/model_sparsity": 0.0834702056607784, "compression_loss": 23.36798858642578, "distillation_loss": 0.589015007019043, "epoch": 2.24, "learning_rate": 4.313421621113929e-05, "loss": 23.9192, "step": 2645, "task_loss": 0.8061721324920654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21761746967899998, "compression/movement_sparsity/importance_threshold": -0.005565163742227582, "compression/movement_sparsity/linear_layer_sparsity": 0.08705938531202018, "compression/movement_sparsity/model_sparsity": 0.08406862760055647, "compression_loss": 23.44496726989746, "distillation_loss": 0.38009291887283325, "epoch": 2.24, "learning_rate": 4.312952005259698e-05, "loss": 23.9653, "step": 2646, "task_loss": 0.3457407057285309 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21833437482405704, "compression/movement_sparsity/importance_threshold": -0.005560064325554448, "compression/movement_sparsity/linear_layer_sparsity": 0.08787288780064165, "compression/movement_sparsity/model_sparsity": 0.08485418377606743, "compression_loss": 23.5218563079834, "distillation_loss": 0.39718660712242126, "epoch": 2.24, "learning_rate": 4.312482389405467e-05, "loss": 24.0245, "step": 2647, "task_loss": 0.9040762186050415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2190508418969339, "compression/movement_sparsity/importance_threshold": -0.005554968024931735, "compression/movement_sparsity/linear_layer_sparsity": 0.08850960257989383, "compression/movement_sparsity/model_sparsity": 0.08546902544388853, "compression_loss": 23.59872055053711, "distillation_loss": 0.42603129148483276, "epoch": 2.24, "learning_rate": 4.3120127735512354e-05, "loss": 23.9994, "step": 2648, "task_loss": 0.4630969762802124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21976687103151593, "compression/movement_sparsity/importance_threshold": -0.005549874839407105, "compression/movement_sparsity/linear_layer_sparsity": 0.08916763777087894, "compression/movement_sparsity/model_sparsity": 0.08610445510171026, "compression_loss": 23.67555809020996, "distillation_loss": 0.33354848623275757, "epoch": 2.24, "learning_rate": 4.311543157697004e-05, "loss": 24.199, "step": 2649, "task_loss": 0.7250646948814392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.220482462361688, "compression/movement_sparsity/importance_threshold": -0.0055447847680282205, "compression/movement_sparsity/linear_layer_sparsity": 0.08972322051353669, "compression/movement_sparsity/model_sparsity": 0.08664095186798529, "compression_loss": 23.752336502075195, "distillation_loss": 0.9378625154495239, "epoch": 2.24, "learning_rate": 4.3110735418427726e-05, "loss": 24.4507, "step": 2650, "task_loss": 0.4904879927635193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22119761602133547, "compression/movement_sparsity/importance_threshold": -0.005539697809842743, "compression/movement_sparsity/linear_layer_sparsity": 0.090449950834272, "compression/movement_sparsity/model_sparsity": 0.08734271676651938, "compression_loss": 23.829078674316406, "distillation_loss": 0.35977494716644287, "epoch": 2.24, "learning_rate": 4.310603925988542e-05, "loss": 24.4914, "step": 2651, "task_loss": 0.66468346118927 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22191233214434336, "compression/movement_sparsity/importance_threshold": -0.005534613963898333, "compression/movement_sparsity/linear_layer_sparsity": 0.0911719234121206, "compression/movement_sparsity/model_sparsity": 0.08803988736527144, "compression_loss": 23.905773162841797, "distillation_loss": 0.7093098759651184, "epoch": 2.24, "learning_rate": 4.3101343101343106e-05, "loss": 24.6068, "step": 2652, "task_loss": 0.8005185127258301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2226266108645969, "compression/movement_sparsity/importance_threshold": -0.005529533229242654, "compression/movement_sparsity/linear_layer_sparsity": 0.09185590559188134, "compression/movement_sparsity/model_sparsity": 0.0887003726529821, "compression_loss": 23.9824161529541, "distillation_loss": 0.4469544589519501, "epoch": 2.24, "learning_rate": 4.309664694280079e-05, "loss": 24.5345, "step": 2653, "task_loss": 0.21579952538013458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2233404523159811, "compression/movement_sparsity/importance_threshold": -0.0055244556049233685, "compression/movement_sparsity/linear_layer_sparsity": 0.09246643489900516, "compression/movement_sparsity/model_sparsity": 0.08928992840019838, "compression_loss": 24.059019088745117, "distillation_loss": 0.42701083421707153, "epoch": 2.24, "learning_rate": 4.309195078425848e-05, "loss": 24.5871, "step": 2654, "task_loss": 1.2210520505905151 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2240538566323813, "compression/movement_sparsity/importance_threshold": -0.005519381089988135, "compression/movement_sparsity/linear_layer_sparsity": 0.09308611004270569, "compression/movement_sparsity/model_sparsity": 0.08988831579636908, "compression_loss": 24.135576248168945, "distillation_loss": 0.24332909286022186, "epoch": 2.24, "learning_rate": 4.3087254625716165e-05, "loss": 24.6078, "step": 2655, "task_loss": 1.9302306175231934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2247668239476821, "compression/movement_sparsity/importance_threshold": -0.0055143096834846216, "compression/movement_sparsity/linear_layer_sparsity": 0.09382520572527939, "compression/movement_sparsity/model_sparsity": 0.0906020212685221, "compression_loss": 24.21204376220703, "distillation_loss": 0.6544049978256226, "epoch": 2.24, "learning_rate": 4.308255846717386e-05, "loss": 24.882, "step": 2656, "task_loss": 1.1821209192276 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22547935439576938, "compression/movement_sparsity/importance_threshold": -0.005509241384460484, "compression/movement_sparsity/linear_layer_sparsity": 0.09439642105170777, "compression/movement_sparsity/model_sparsity": 0.09115361359122377, "compression_loss": 24.288490295410156, "distillation_loss": 0.6180415153503418, "epoch": 2.25, "learning_rate": 4.307786230863154e-05, "loss": 24.8414, "step": 2657, "task_loss": 1.1009920835494995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22619144811052805, "compression/movement_sparsity/importance_threshold": -0.005504176191963386, "compression/movement_sparsity/linear_layer_sparsity": 0.095046789002903, "compression/movement_sparsity/model_sparsity": 0.09178163940252962, "compression_loss": 24.364871978759766, "distillation_loss": 0.5265064239501953, "epoch": 2.25, "learning_rate": 4.307316615008923e-05, "loss": 24.9015, "step": 2658, "task_loss": 1.0552539825439453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22690310522584312, "compression/movement_sparsity/importance_threshold": -0.005499114105040988, "compression/movement_sparsity/linear_layer_sparsity": 0.09583765942135163, "compression/movement_sparsity/model_sparsity": 0.09254534098910254, "compression_loss": 24.44117546081543, "distillation_loss": 0.4358474612236023, "epoch": 2.25, "learning_rate": 4.306846999154692e-05, "loss": 25.0817, "step": 2659, "task_loss": 1.0178918838500977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2276143258755997, "compression/movement_sparsity/importance_threshold": -0.005494055122740956, "compression/movement_sparsity/linear_layer_sparsity": 0.0965397424875836, "compression/movement_sparsity/model_sparsity": 0.0932233053421493, "compression_loss": 24.517452239990234, "distillation_loss": 0.492343008518219, "epoch": 2.25, "learning_rate": 4.30637738330046e-05, "loss": 25.1225, "step": 2660, "task_loss": 0.3752025067806244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22832511019368307, "compression/movement_sparsity/importance_threshold": -0.005488999244110948, "compression/movement_sparsity/linear_layer_sparsity": 0.097193019935682, "compression/movement_sparsity/model_sparsity": 0.09385414070018902, "compression_loss": 24.59368896484375, "distillation_loss": 0.5317042469978333, "epoch": 2.25, "learning_rate": 4.305907767446229e-05, "loss": 25.1379, "step": 2661, "task_loss": 0.23216867446899414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2290354583139782, "compression/movement_sparsity/importance_threshold": -0.005483946468198627, "compression/movement_sparsity/linear_layer_sparsity": 0.0978111926342604, "compression/movement_sparsity/model_sparsity": 0.09445107726484961, "compression_loss": 24.669862747192383, "distillation_loss": 0.5643749833106995, "epoch": 2.25, "learning_rate": 4.3054381515919976e-05, "loss": 25.0975, "step": 2662, "task_loss": 0.8683118224143982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22974537037037046, "compression/movement_sparsity/importance_threshold": -0.005478896794051656, "compression/movement_sparsity/linear_layer_sparsity": 0.09850738516168027, "compression/movement_sparsity/model_sparsity": 0.09512335343721387, "compression_loss": 24.745981216430664, "distillation_loss": 0.5163435935974121, "epoch": 2.25, "learning_rate": 4.304968535737767e-05, "loss": 25.3077, "step": 2663, "task_loss": 1.0334433317184448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23045484649674486, "compression/movement_sparsity/importance_threshold": -0.005473850220717695, "compression/movement_sparsity/linear_layer_sparsity": 0.09924917570613967, "compression/movement_sparsity/model_sparsity": 0.09583966119445646, "compression_loss": 24.82204246520996, "distillation_loss": 0.31329426169395447, "epoch": 2.25, "learning_rate": 4.3044989198835355e-05, "loss": 25.3566, "step": 2664, "task_loss": 0.8825055360794067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2311638868269862, "compression/movement_sparsity/importance_threshold": -0.005468806747244409, "compression/movement_sparsity/linear_layer_sparsity": 0.10002453278249403, "compression/movement_sparsity/model_sparsity": 0.09658838236996067, "compression_loss": 24.898061752319336, "distillation_loss": 0.3222837448120117, "epoch": 2.25, "learning_rate": 4.304029304029304e-05, "loss": 25.3699, "step": 2665, "task_loss": 0.1766992211341858 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23187249149498024, "compression/movement_sparsity/importance_threshold": -0.005463766372679456, "compression/movement_sparsity/linear_layer_sparsity": 0.10073270909838793, "compression/movement_sparsity/model_sparsity": 0.09727223065079846, "compression_loss": 24.974018096923828, "distillation_loss": 0.9242329001426697, "epoch": 2.25, "learning_rate": 4.303559688175073e-05, "loss": 25.521, "step": 2666, "task_loss": 1.0188361406326294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2325806606346119, "compression/movement_sparsity/importance_threshold": -0.005458729096070499, "compression/movement_sparsity/linear_layer_sparsity": 0.10148992951576814, "compression/movement_sparsity/model_sparsity": 0.09800343821735918, "compression_loss": 25.049959182739258, "distillation_loss": 0.7898390293121338, "epoch": 2.25, "learning_rate": 4.3030900723208414e-05, "loss": 25.7099, "step": 2667, "task_loss": 1.0402696132659912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23328839437976623, "compression/movement_sparsity/importance_threshold": -0.0054536949164652, "compression/movement_sparsity/linear_layer_sparsity": 0.10227445627703448, "compression/movement_sparsity/model_sparsity": 0.0987610140708894, "compression_loss": 25.12582778930664, "distillation_loss": 0.5720397233963013, "epoch": 2.26, "learning_rate": 4.302620456466611e-05, "loss": 25.6962, "step": 2668, "task_loss": 0.5320547223091125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23399569286432842, "compression/movement_sparsity/importance_threshold": -0.005448663832911222, "compression/movement_sparsity/linear_layer_sparsity": 0.10286669391100488, "compression/movement_sparsity/model_sparsity": 0.09933290652019686, "compression_loss": 25.20165252685547, "distillation_loss": 0.596847653388977, "epoch": 2.26, "learning_rate": 4.3021508406123794e-05, "loss": 25.7029, "step": 2669, "task_loss": 0.6297202110290527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2347025562221836, "compression/movement_sparsity/importance_threshold": -0.005443635844456225, "compression/movement_sparsity/linear_layer_sparsity": 0.10360541994438187, "compression/movement_sparsity/model_sparsity": 0.10004625504174025, "compression_loss": 25.277441024780273, "distillation_loss": 0.27569687366485596, "epoch": 2.26, "learning_rate": 4.301681224758148e-05, "loss": 25.627, "step": 2670, "task_loss": 0.11818414181470871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23540898458721693, "compression/movement_sparsity/importance_threshold": -0.005438610950147873, "compression/movement_sparsity/linear_layer_sparsity": 0.10445098651977619, "compression/movement_sparsity/model_sparsity": 0.10086277380400273, "compression_loss": 25.3531494140625, "distillation_loss": 0.6741725206375122, "epoch": 2.26, "learning_rate": 4.3012116089039166e-05, "loss": 25.8688, "step": 2671, "task_loss": 0.24563604593276978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2361149780933135, "compression/movement_sparsity/importance_threshold": -0.005433589149033826, "compression/movement_sparsity/linear_layer_sparsity": 0.1052858809651364, "compression/movement_sparsity/model_sparsity": 0.1016689870567291, "compression_loss": 25.4288330078125, "distillation_loss": 0.4144495725631714, "epoch": 2.26, "learning_rate": 4.300741993049685e-05, "loss": 25.9188, "step": 2672, "task_loss": 1.4037030935287476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23682053687435844, "compression/movement_sparsity/importance_threshold": -0.005428570440161747, "compression/movement_sparsity/linear_layer_sparsity": 0.1059652604161897, "compression/movement_sparsity/model_sparsity": 0.10232502773362305, "compression_loss": 25.504440307617188, "distillation_loss": 0.6565845608711243, "epoch": 2.26, "learning_rate": 4.3002723771954546e-05, "loss": 26.0114, "step": 2673, "task_loss": 0.7949344515800476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23752566106423678, "compression/movement_sparsity/importance_threshold": -0.005423554822579299, "compression/movement_sparsity/linear_layer_sparsity": 0.10678662093128322, "compression/movement_sparsity/model_sparsity": 0.10311817198822261, "compression_loss": 25.579994201660156, "distillation_loss": 0.8904014825820923, "epoch": 2.26, "learning_rate": 4.2998027613412225e-05, "loss": 26.1857, "step": 2674, "task_loss": 0.7977390289306641 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2382303507968342, "compression/movement_sparsity/importance_threshold": -0.005418542295334138, "compression/movement_sparsity/linear_layer_sparsity": 0.10743072869446962, "compression/movement_sparsity/model_sparsity": 0.10374015266823632, "compression_loss": 25.65550422668457, "distillation_loss": 0.3800717890262604, "epoch": 2.26, "learning_rate": 4.299333145486992e-05, "loss": 26.0805, "step": 2675, "task_loss": 0.9139372110366821 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23893460620603502, "compression/movement_sparsity/importance_threshold": -0.005413532857473934, "compression/movement_sparsity/linear_layer_sparsity": 0.10815422756577561, "compression/movement_sparsity/model_sparsity": 0.10443879712757008, "compression_loss": 25.730960845947266, "distillation_loss": 0.7400597333908081, "epoch": 2.26, "learning_rate": 4.2988635296327605e-05, "loss": 26.5158, "step": 2676, "task_loss": 0.8119482398033142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23963842742572505, "compression/movement_sparsity/importance_threshold": -0.005408526508046344, "compression/movement_sparsity/linear_layer_sparsity": 0.1089723208589369, "compression/movement_sparsity/model_sparsity": 0.10522878639936194, "compression_loss": 25.80637550354004, "distillation_loss": 0.3702929615974426, "epoch": 2.26, "learning_rate": 4.29839391377853e-05, "loss": 26.1825, "step": 2677, "task_loss": 0.12539008259773254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2403418145897892, "compression/movement_sparsity/importance_threshold": -0.00540352324609903, "compression/movement_sparsity/linear_layer_sparsity": 0.1097415727614617, "compression/movement_sparsity/model_sparsity": 0.10597161213253935, "compression_loss": 25.881725311279297, "distillation_loss": 0.6799305081367493, "epoch": 2.26, "learning_rate": 4.2979242979242984e-05, "loss": 26.4246, "step": 2678, "task_loss": 0.798221230506897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2410447678321126, "compression/movement_sparsity/importance_threshold": -0.005398523070679655, "compression/movement_sparsity/linear_layer_sparsity": 0.11050471949015693, "compression/movement_sparsity/model_sparsity": 0.10670854242338997, "compression_loss": 25.957035064697266, "distillation_loss": 0.3962724208831787, "epoch": 2.26, "learning_rate": 4.297454682070067e-05, "loss": 26.6015, "step": 2679, "task_loss": 0.6219931244850159 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24174728728658035, "compression/movement_sparsity/importance_threshold": -0.005393525980835881, "compression/movement_sparsity/linear_layer_sparsity": 0.11129840401216762, "compression/movement_sparsity/model_sparsity": 0.1074749614404104, "compression_loss": 26.032285690307617, "distillation_loss": 0.7264384031295776, "epoch": 2.27, "learning_rate": 4.296985066215836e-05, "loss": 26.643, "step": 2680, "task_loss": 1.0777698755264282 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2424493730870777, "compression/movement_sparsity/importance_threshold": -0.005388531975615369, "compression/movement_sparsity/linear_layer_sparsity": 0.11198694122396526, "compression/movement_sparsity/model_sparsity": 0.10813984528079457, "compression_loss": 26.10748291015625, "distillation_loss": 0.5696560740470886, "epoch": 2.27, "learning_rate": 4.296515450361604e-05, "loss": 26.8049, "step": 2681, "task_loss": 0.700137734413147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24315102536748967, "compression/movement_sparsity/importance_threshold": -0.005383541054065781, "compression/movement_sparsity/linear_layer_sparsity": 0.1127289464034421, "compression/movement_sparsity/model_sparsity": 0.10885636029968146, "compression_loss": 26.182645797729492, "distillation_loss": 0.6613595485687256, "epoch": 2.27, "learning_rate": 4.2960458345073736e-05, "loss": 26.83, "step": 2682, "task_loss": 0.9302558898925781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2438522442617015, "compression/movement_sparsity/importance_threshold": -0.005378553215234779, "compression/movement_sparsity/linear_layer_sparsity": 0.11347071309956623, "compression/movement_sparsity/model_sparsity": 0.10957264502785245, "compression_loss": 26.25776481628418, "distillation_loss": 0.4168527126312256, "epoch": 2.27, "learning_rate": 4.2955762186531416e-05, "loss": 26.7118, "step": 2683, "task_loss": 1.0318697690963745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24455302990359828, "compression/movement_sparsity/importance_threshold": -0.005373568458170025, "compression/movement_sparsity/linear_layer_sparsity": 0.11415014024729006, "compression/movement_sparsity/model_sparsity": 0.1102287317628896, "compression_loss": 26.33281898498535, "distillation_loss": 0.9965804815292358, "epoch": 2.27, "learning_rate": 4.295106602798911e-05, "loss": 26.9454, "step": 2684, "task_loss": 0.7210372090339661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24525338242706485, "compression/movement_sparsity/importance_threshold": -0.005368586781919184, "compression/movement_sparsity/linear_layer_sparsity": 0.11484614198802776, "compression/movement_sparsity/model_sparsity": 0.11090082370268114, "compression_loss": 26.407865524291992, "distillation_loss": 0.4356876611709595, "epoch": 2.27, "learning_rate": 4.2946369869446796e-05, "loss": 26.8886, "step": 2685, "task_loss": 1.4216982126235962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24595330196598697, "compression/movement_sparsity/importance_threshold": -0.005363608185529911, "compression/movement_sparsity/linear_layer_sparsity": 0.11570736499552796, "compression/movement_sparsity/model_sparsity": 0.11173246105044185, "compression_loss": 26.482830047607422, "distillation_loss": 0.7411900758743286, "epoch": 2.27, "learning_rate": 4.294167371090448e-05, "loss": 27.0638, "step": 2686, "task_loss": 0.7104626893997192 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24665278865424944, "compression/movement_sparsity/importance_threshold": -0.005358632668049872, "compression/movement_sparsity/linear_layer_sparsity": 0.11642474676883675, "compression/movement_sparsity/model_sparsity": 0.11242519855291301, "compression_loss": 26.557767868041992, "distillation_loss": 0.5689317584037781, "epoch": 2.27, "learning_rate": 4.293697755236217e-05, "loss": 27.2635, "step": 2687, "task_loss": 1.085319995880127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2473518426257375, "compression/movement_sparsity/importance_threshold": -0.0053536602285267285, "compression/movement_sparsity/linear_layer_sparsity": 0.11710399505404605, "compression/movement_sparsity/model_sparsity": 0.11308111256991324, "compression_loss": 26.63263702392578, "distillation_loss": 0.7402950525283813, "epoch": 2.27, "learning_rate": 4.2932281393819855e-05, "loss": 27.3796, "step": 2688, "task_loss": 1.2651280164718628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24805046401433617, "compression/movement_sparsity/importance_threshold": -0.005348690866008142, "compression/movement_sparsity/linear_layer_sparsity": 0.11764758208406213, "compression/movement_sparsity/model_sparsity": 0.11360602571317897, "compression_loss": 26.707536697387695, "distillation_loss": 0.32032763957977295, "epoch": 2.27, "learning_rate": 4.292758523527755e-05, "loss": 27.1941, "step": 2689, "task_loss": 0.11223143339157104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24874865295393067, "compression/movement_sparsity/importance_threshold": -0.005343724579541774, "compression/movement_sparsity/linear_layer_sparsity": 0.11833136155297307, "compression/movement_sparsity/model_sparsity": 0.11426631525378111, "compression_loss": 26.78236198425293, "distillation_loss": 0.6710376143455505, "epoch": 2.27, "learning_rate": 4.2922889076735234e-05, "loss": 27.4015, "step": 2690, "task_loss": 2.1001546382904053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24944640957840603, "compression/movement_sparsity/importance_threshold": -0.005338761368175288, "compression/movement_sparsity/linear_layer_sparsity": 0.11896479718612538, "compression/movement_sparsity/model_sparsity": 0.11487799042425871, "compression_loss": 26.857135772705078, "distillation_loss": 0.7020890712738037, "epoch": 2.27, "learning_rate": 4.291819291819292e-05, "loss": 27.4785, "step": 2691, "task_loss": 1.2456480264663696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2501437340216476, "compression/movement_sparsity/importance_threshold": -0.0053338012309563435, "compression/movement_sparsity/linear_layer_sparsity": 0.119688439147443, "compression/movement_sparsity/model_sparsity": 0.11557677305802201, "compression_loss": 26.931875228881836, "distillation_loss": 0.5488407015800476, "epoch": 2.28, "learning_rate": 4.291349675965061e-05, "loss": 27.5749, "step": 2692, "task_loss": 0.6112530827522278 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25084062641754024, "compression/movement_sparsity/importance_threshold": -0.005328844166932605, "compression/movement_sparsity/linear_layer_sparsity": 0.12029286328073727, "compression/movement_sparsity/model_sparsity": 0.11616043336291149, "compression_loss": 27.006555557250977, "distillation_loss": 0.2784539461135864, "epoch": 2.28, "learning_rate": 4.290880060110829e-05, "loss": 27.4915, "step": 2693, "task_loss": 0.3147946298122406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25153708689996934, "compression/movement_sparsity/importance_threshold": -0.005323890175151732, "compression/movement_sparsity/linear_layer_sparsity": 0.12099041516326761, "compression/movement_sparsity/model_sparsity": 0.11683402219235633, "compression_loss": 27.081174850463867, "distillation_loss": 0.6261981129646301, "epoch": 2.28, "learning_rate": 4.2904104442565986e-05, "loss": 27.7263, "step": 2694, "task_loss": 1.0988560914993286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2522331156028199, "compression/movement_sparsity/importance_threshold": -0.005318939254661387, "compression/movement_sparsity/linear_layer_sparsity": 0.12175070009173024, "compression/movement_sparsity/model_sparsity": 0.11756818899461625, "compression_loss": 27.1557559967041, "distillation_loss": 0.5597829818725586, "epoch": 2.28, "learning_rate": 4.289940828402367e-05, "loss": 27.8083, "step": 2695, "task_loss": 0.7904340624809265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25292871265997685, "compression/movement_sparsity/importance_threshold": -0.005313991404509236, "compression/movement_sparsity/linear_layer_sparsity": 0.12242225728881441, "compression/movement_sparsity/model_sparsity": 0.118216676136029, "compression_loss": 27.230268478393555, "distillation_loss": 0.24966216087341309, "epoch": 2.28, "learning_rate": 4.289471212548136e-05, "loss": 27.728, "step": 2696, "task_loss": 1.009818196296692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25362387820532584, "compression/movement_sparsity/importance_threshold": -0.005309046623742934, "compression/movement_sparsity/linear_layer_sparsity": 0.12310036085393067, "compression/movement_sparsity/model_sparsity": 0.11887148475759295, "compression_loss": 27.304750442504883, "distillation_loss": 0.5341259241104126, "epoch": 2.28, "learning_rate": 4.2890015966939045e-05, "loss": 27.8853, "step": 2697, "task_loss": 0.5048677921295166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2543186123727518, "compression/movement_sparsity/importance_threshold": -0.005304104911410145, "compression/movement_sparsity/linear_layer_sparsity": 0.12372921760671081, "compression/movement_sparsity/model_sparsity": 0.11947873834632543, "compression_loss": 27.379161834716797, "distillation_loss": 0.44952309131622314, "epoch": 2.28, "learning_rate": 4.288531980839673e-05, "loss": 28.0252, "step": 2698, "task_loss": 0.16207964718341827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2550129152961398, "compression/movement_sparsity/importance_threshold": -0.005299166266558532, "compression/movement_sparsity/linear_layer_sparsity": 0.12457020530173295, "compression/movement_sparsity/model_sparsity": 0.1202908355268428, "compression_loss": 27.453542709350586, "distillation_loss": 1.234886646270752, "epoch": 2.28, "learning_rate": 4.2880623649854425e-05, "loss": 28.2268, "step": 2699, "task_loss": 2.1860523223876953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.255706787109375, "compression/movement_sparsity/importance_threshold": -0.005294230688235757, "compression/movement_sparsity/linear_layer_sparsity": 0.12519753576105572, "compression/movement_sparsity/model_sparsity": 0.1208966152549936, "compression_loss": 27.527854919433594, "distillation_loss": 0.42115283012390137, "epoch": 2.28, "learning_rate": 4.2875927491312104e-05, "loss": 28.0965, "step": 2700, "task_loss": 1.1207680702209473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25640022794634254, "compression/movement_sparsity/importance_threshold": -0.005289298175489482, "compression/movement_sparsity/linear_layer_sparsity": 0.1259484363695889, "compression/movement_sparsity/model_sparsity": 0.12162172011758321, "compression_loss": 27.60213851928711, "distillation_loss": 0.2963061034679413, "epoch": 2.28, "learning_rate": 4.28712313327698e-05, "loss": 27.9772, "step": 2701, "task_loss": 0.16318850219249725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25709323794092764, "compression/movement_sparsity/importance_threshold": -0.005284368727367366, "compression/movement_sparsity/linear_layer_sparsity": 0.12670088711991478, "compression/movement_sparsity/model_sparsity": 0.12234832186982612, "compression_loss": 27.676340103149414, "distillation_loss": 0.3478449881076813, "epoch": 2.28, "learning_rate": 4.2866535174227484e-05, "loss": 27.9935, "step": 2702, "task_loss": 0.5519226789474487 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2577858172270152, "compression/movement_sparsity/importance_threshold": -0.0052794423429170755, "compression/movement_sparsity/linear_layer_sparsity": 0.12755278542832374, "compression/movement_sparsity/model_sparsity": 0.1231709548505955, "compression_loss": 27.750499725341797, "distillation_loss": 0.41684985160827637, "epoch": 2.28, "learning_rate": 4.286183901568518e-05, "loss": 28.286, "step": 2703, "task_loss": 0.7991785407066345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2584779659384907, "compression/movement_sparsity/importance_threshold": -0.005274519021186269, "compression/movement_sparsity/linear_layer_sparsity": 0.12828450005113082, "compression/movement_sparsity/model_sparsity": 0.1238775328250917, "compression_loss": 27.824583053588867, "distillation_loss": 0.6748076677322388, "epoch": 2.29, "learning_rate": 4.2857142857142856e-05, "loss": 28.5168, "step": 2704, "task_loss": 1.53501296043396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2591696842092388, "compression/movement_sparsity/importance_threshold": -0.005269598761222611, "compression/movement_sparsity/linear_layer_sparsity": 0.12904458226874366, "compression/movement_sparsity/model_sparsity": 0.12461150388024311, "compression_loss": 27.898637771606445, "distillation_loss": 0.5808929204940796, "epoch": 2.29, "learning_rate": 4.285244669860054e-05, "loss": 28.4766, "step": 2705, "task_loss": 0.6437965631484985 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2598609721731452, "compression/movement_sparsity/importance_threshold": -0.005264681562073761, "compression/movement_sparsity/linear_layer_sparsity": 0.12987493360623462, "compression/movement_sparsity/model_sparsity": 0.12541333009483177, "compression_loss": 27.972620010375977, "distillation_loss": 0.36581847071647644, "epoch": 2.29, "learning_rate": 4.2847750540058236e-05, "loss": 28.3306, "step": 2706, "task_loss": 0.5201682448387146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26055182996409476, "compression/movement_sparsity/importance_threshold": -0.00525976742278738, "compression/movement_sparsity/linear_layer_sparsity": 0.13074490895277954, "compression/movement_sparsity/model_sparsity": 0.12625341911186566, "compression_loss": 28.046552658081055, "distillation_loss": 0.4081307351589203, "epoch": 2.29, "learning_rate": 4.284305438151592e-05, "loss": 28.6175, "step": 2707, "task_loss": 0.6570178866386414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26124225771597265, "compression/movement_sparsity/importance_threshold": -0.005254856342411133, "compression/movement_sparsity/linear_layer_sparsity": 0.13150807952981003, "compression/movement_sparsity/model_sparsity": 0.12699037243178787, "compression_loss": 28.12043571472168, "distillation_loss": 0.5787445306777954, "epoch": 2.29, "learning_rate": 4.283835822297361e-05, "loss": 28.8116, "step": 2708, "task_loss": 0.5056787729263306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2619322555626641, "compression/movement_sparsity/importance_threshold": -0.005249948319992679, "compression/movement_sparsity/linear_layer_sparsity": 0.13229260629107636, "compression/movement_sparsity/model_sparsity": 0.12774794828531807, "compression_loss": 28.19428062438965, "distillation_loss": 0.5046535730361938, "epoch": 2.29, "learning_rate": 4.2833662064431295e-05, "loss": 28.7526, "step": 2709, "task_loss": 0.5894928574562073 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2626218236380541, "compression/movement_sparsity/importance_threshold": -0.005245043354579681, "compression/movement_sparsity/linear_layer_sparsity": 0.13310157759599622, "compression/movement_sparsity/model_sparsity": 0.12852912893722712, "compression_loss": 28.26807975769043, "distillation_loss": 0.37120693922042847, "epoch": 2.29, "learning_rate": 4.282896590588899e-05, "loss": 28.71, "step": 2710, "task_loss": 0.609689474105835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2633109620760279, "compression/movement_sparsity/importance_threshold": -0.0052401414452198005, "compression/movement_sparsity/linear_layer_sparsity": 0.13388327832953287, "compression/movement_sparsity/model_sparsity": 0.12928397584577403, "compression_loss": 28.341838836669922, "distillation_loss": 0.5352473258972168, "epoch": 2.29, "learning_rate": 4.2824269747346674e-05, "loss": 28.9188, "step": 2711, "task_loss": 0.9895550012588501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2639996710104705, "compression/movement_sparsity/importance_threshold": -0.005235242590960702, "compression/movement_sparsity/linear_layer_sparsity": 0.13461008019527398, "compression/movement_sparsity/model_sparsity": 0.12998580983152289, "compression_loss": 28.415552139282227, "distillation_loss": 0.5707123875617981, "epoch": 2.29, "learning_rate": 4.281957358880436e-05, "loss": 29.0422, "step": 2712, "task_loss": 0.700082540512085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2646879505752672, "compression/movement_sparsity/importance_threshold": -0.005230346790850043, "compression/movement_sparsity/linear_layer_sparsity": 0.13544974430768855, "compression/movement_sparsity/model_sparsity": 0.13079662889856705, "compression_loss": 28.48921775817871, "distillation_loss": 0.6719260811805725, "epoch": 2.29, "learning_rate": 4.281487743026205e-05, "loss": 29.0436, "step": 2713, "task_loss": 0.24958136677742004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.265375800904303, "compression/movement_sparsity/importance_threshold": -0.005225454043935488, "compression/movement_sparsity/linear_layer_sparsity": 0.136231385420387, "compression/movement_sparsity/model_sparsity": 0.131551418234435, "compression_loss": 28.562854766845703, "distillation_loss": 0.6429877281188965, "epoch": 2.29, "learning_rate": 4.281018127171973e-05, "loss": 29.3475, "step": 2714, "task_loss": 0.8250919580459595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2660632221314633, "compression/movement_sparsity/importance_threshold": -0.005220564349264698, "compression/movement_sparsity/linear_layer_sparsity": 0.13690603097688886, "compression/movement_sparsity/model_sparsity": 0.13220288764061852, "compression_loss": 28.63644790649414, "distillation_loss": 0.5281400084495544, "epoch": 2.29, "learning_rate": 4.2805485113177426e-05, "loss": 29.172, "step": 2715, "task_loss": 0.8531211614608765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2667502143906326, "compression/movement_sparsity/importance_threshold": -0.005215677705885338, "compression/movement_sparsity/linear_layer_sparsity": 0.13765849365138236, "compression/movement_sparsity/model_sparsity": 0.13292950090739725, "compression_loss": 28.70996856689453, "distillation_loss": 0.7277277708053589, "epoch": 2.3, "learning_rate": 4.280078895463511e-05, "loss": 29.4415, "step": 2716, "task_loss": 1.0086910724639893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26743677781569686, "compression/movement_sparsity/importance_threshold": -0.005210794112845064, "compression/movement_sparsity/linear_layer_sparsity": 0.13839264080438718, "compression/movement_sparsity/model_sparsity": 0.1336384278471955, "compression_loss": 28.783451080322266, "distillation_loss": 0.4612288475036621, "epoch": 2.3, "learning_rate": 4.27960927960928e-05, "loss": 29.2845, "step": 2717, "task_loss": 0.4221337139606476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26812291254054077, "compression/movement_sparsity/importance_threshold": -0.005205913569191542, "compression/movement_sparsity/linear_layer_sparsity": 0.13908866639346013, "compression/movement_sparsity/model_sparsity": 0.13431054281605867, "compression_loss": 28.856895446777344, "distillation_loss": 0.7524464130401611, "epoch": 2.3, "learning_rate": 4.2791396637550485e-05, "loss": 29.4987, "step": 2718, "task_loss": 0.7221577167510986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2688086186990496, "compression/movement_sparsity/importance_threshold": -0.005201036073972433, "compression/movement_sparsity/linear_layer_sparsity": 0.13982115608716356, "compression/movement_sparsity/model_sparsity": 0.1350178692353815, "compression_loss": 28.930288314819336, "distillation_loss": 0.9604268074035645, "epoch": 2.3, "learning_rate": 4.278670047900817e-05, "loss": 29.5212, "step": 2719, "task_loss": 1.3540548086166382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26949389642510846, "compression/movement_sparsity/importance_threshold": -0.005196161626235398, "compression/movement_sparsity/linear_layer_sparsity": 0.14063928515282775, "compression/movement_sparsity/model_sparsity": 0.13580789305078075, "compression_loss": 29.00364112854004, "distillation_loss": 0.6855214834213257, "epoch": 2.3, "learning_rate": 4.2782004320465865e-05, "loss": 29.6872, "step": 2720, "task_loss": 0.65156489610672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2701787458526025, "compression/movement_sparsity/importance_threshold": -0.0051912902250281, "compression/movement_sparsity/linear_layer_sparsity": 0.14145797465437085, "compression/movement_sparsity/model_sparsity": 0.13659845804936233, "compression_loss": 29.076919555664062, "distillation_loss": 0.26088255643844604, "epoch": 2.3, "learning_rate": 4.2777308161923544e-05, "loss": 29.5317, "step": 2721, "task_loss": 0.2118864357471466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2708631671154169, "compression/movement_sparsity/importance_threshold": -0.0051864218693981985, "compression/movement_sparsity/linear_layer_sparsity": 0.14225809822690488, "compression/movement_sparsity/model_sparsity": 0.1373710949157118, "compression_loss": 29.150150299072266, "distillation_loss": 0.437964528799057, "epoch": 2.3, "learning_rate": 4.277261200338124e-05, "loss": 29.6106, "step": 2722, "task_loss": 0.20924249291419983 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27154716034743664, "compression/movement_sparsity/importance_threshold": -0.005181556558393358, "compression/movement_sparsity/linear_layer_sparsity": 0.1430060058693615, "compression/movement_sparsity/model_sparsity": 0.138093309629817, "compression_loss": 29.22333526611328, "distillation_loss": 0.36936789751052856, "epoch": 2.3, "learning_rate": 4.2767915844838924e-05, "loss": 29.8959, "step": 2723, "task_loss": 0.6281954646110535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.272230725682547, "compression/movement_sparsity/importance_threshold": -0.00517669429106124, "compression/movement_sparsity/linear_layer_sparsity": 0.14385615132512797, "compression/movement_sparsity/model_sparsity": 0.1389142499738246, "compression_loss": 29.296463012695312, "distillation_loss": 0.6230946779251099, "epoch": 2.3, "learning_rate": 4.276321968629661e-05, "loss": 29.8794, "step": 2724, "task_loss": 0.3696346879005432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27291386325463274, "compression/movement_sparsity/importance_threshold": -0.005171835066449508, "compression/movement_sparsity/linear_layer_sparsity": 0.14477499191064466, "compression/movement_sparsity/model_sparsity": 0.1398015255585445, "compression_loss": 29.36955451965332, "distillation_loss": 0.6424458026885986, "epoch": 2.3, "learning_rate": 4.27585235277543e-05, "loss": 29.9051, "step": 2725, "task_loss": 0.5481419563293457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27359657319758, "compression/movement_sparsity/importance_threshold": -0.005166978883605817, "compression/movement_sparsity/linear_layer_sparsity": 0.1455566568716784, "compression/movement_sparsity/model_sparsity": 0.14055633792348404, "compression_loss": 29.44258689880371, "distillation_loss": 0.40198230743408203, "epoch": 2.3, "learning_rate": 4.275382736921198e-05, "loss": 29.8922, "step": 2726, "task_loss": 0.4201386868953705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27427885564527266, "compression/movement_sparsity/importance_threshold": -0.005162125741577838, "compression/movement_sparsity/linear_layer_sparsity": 0.14634098092209494, "compression/movement_sparsity/model_sparsity": 0.14131371802990575, "compression_loss": 29.515560150146484, "distillation_loss": 0.3371087908744812, "epoch": 2.3, "learning_rate": 4.2749131210669676e-05, "loss": 30.0168, "step": 2727, "task_loss": 0.5326480865478516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2749607107315968, "compression/movement_sparsity/importance_threshold": -0.005157275639413227, "compression/movement_sparsity/linear_layer_sparsity": 0.14705396267754609, "compression/movement_sparsity/model_sparsity": 0.14200220666866875, "compression_loss": 29.58849334716797, "distillation_loss": 0.6530739068984985, "epoch": 2.31, "learning_rate": 4.274443505212736e-05, "loss": 30.2532, "step": 2728, "task_loss": 0.6649225950241089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27564213859043707, "compression/movement_sparsity/importance_threshold": -0.005152428576159647, "compression/movement_sparsity/linear_layer_sparsity": 0.14779574129783785, "compression/movement_sparsity/model_sparsity": 0.14271850291137553, "compression_loss": 29.661375045776367, "distillation_loss": 0.3889024257659912, "epoch": 2.31, "learning_rate": 4.273973889358505e-05, "loss": 30.2171, "step": 2729, "task_loss": 0.6826656460762024 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27632313935567887, "compression/movement_sparsity/importance_threshold": -0.0051475845508647585, "compression/movement_sparsity/linear_layer_sparsity": 0.14876704822095235, "compression/movement_sparsity/model_sparsity": 0.14365644245359144, "compression_loss": 29.73419952392578, "distillation_loss": 0.4738536775112152, "epoch": 2.31, "learning_rate": 4.2735042735042735e-05, "loss": 30.3281, "step": 2730, "task_loss": 0.43758463859558105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2770037131612072, "compression/movement_sparsity/importance_threshold": -0.005142743562576227, "compression/movement_sparsity/linear_layer_sparsity": 0.14961107657872164, "compression/movement_sparsity/model_sparsity": 0.1444714758407364, "compression_loss": 29.806997299194336, "distillation_loss": 0.9110275506973267, "epoch": 2.31, "learning_rate": 4.273034657650042e-05, "loss": 30.6162, "step": 2731, "task_loss": 0.8382347822189331 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2776838601409073, "compression/movement_sparsity/importance_threshold": -0.005137905610341711, "compression/movement_sparsity/linear_layer_sparsity": 0.15036660376429756, "compression/movement_sparsity/model_sparsity": 0.14520104834321432, "compression_loss": 29.87975311279297, "distillation_loss": 0.631405234336853, "epoch": 2.31, "learning_rate": 4.2725650417958114e-05, "loss": 30.5794, "step": 2732, "task_loss": 0.40878942608833313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27836358042866427, "compression/movement_sparsity/importance_threshold": -0.005133070693208874, "compression/movement_sparsity/linear_layer_sparsity": 0.15098608812131592, "compression/movement_sparsity/model_sparsity": 0.1457992515068123, "compression_loss": 29.95246696472168, "distillation_loss": 0.4684833288192749, "epoch": 2.31, "learning_rate": 4.27209542594158e-05, "loss": 30.5487, "step": 2733, "task_loss": 0.6122934222221375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27904287415836315, "compression/movement_sparsity/importance_threshold": -0.0051282388102253775, "compression/movement_sparsity/linear_layer_sparsity": 0.15175519693382908, "compression/movement_sparsity/model_sparsity": 0.14654193906556018, "compression_loss": 30.025150299072266, "distillation_loss": 0.3875204622745514, "epoch": 2.31, "learning_rate": 4.271625810087349e-05, "loss": 30.547, "step": 2734, "task_loss": 0.3584195375442505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2797217414638892, "compression/movement_sparsity/importance_threshold": -0.005123409960438884, "compression/movement_sparsity/linear_layer_sparsity": 0.1526330541551813, "compression/movement_sparsity/model_sparsity": 0.1473896391907543, "compression_loss": 30.09777069091797, "distillation_loss": 0.285037636756897, "epoch": 2.31, "learning_rate": 4.2711561942331173e-05, "loss": 30.661, "step": 2735, "task_loss": 0.8624798655509949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2804001824791271, "compression/movement_sparsity/importance_threshold": -0.005118584142897056, "compression/movement_sparsity/linear_layer_sparsity": 0.15335959368923444, "compression/movement_sparsity/model_sparsity": 0.14809121985671564, "compression_loss": 30.17035675048828, "distillation_loss": 0.7073843479156494, "epoch": 2.31, "learning_rate": 4.270686578378886e-05, "loss": 30.9576, "step": 2736, "task_loss": 1.4000554084777832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2810781973379629, "compression/movement_sparsity/importance_threshold": -0.005113761356647551, "compression/movement_sparsity/linear_layer_sparsity": 0.15422062591005248, "compression/movement_sparsity/model_sparsity": 0.14892267297190365, "compression_loss": 30.24290657043457, "distillation_loss": 0.4953239858150482, "epoch": 2.31, "learning_rate": 4.270216962524655e-05, "loss": 30.7861, "step": 2737, "task_loss": 0.6268184185028076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2817557861742811, "compression/movement_sparsity/importance_threshold": -0.005108941600738036, "compression/movement_sparsity/linear_layer_sparsity": 0.15512137753326558, "compression/movement_sparsity/model_sparsity": 0.14979248100582326, "compression_loss": 30.315401077270508, "distillation_loss": 0.6597387790679932, "epoch": 2.31, "learning_rate": 4.269747346670423e-05, "loss": 30.9536, "step": 2738, "task_loss": 0.9142181873321533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28243294912196704, "compression/movement_sparsity/importance_threshold": -0.005104124874216171, "compression/movement_sparsity/linear_layer_sparsity": 0.15584810785400088, "compression/movement_sparsity/model_sparsity": 0.15049424590435734, "compression_loss": 30.387845993041992, "distillation_loss": 0.5772331357002258, "epoch": 2.32, "learning_rate": 4.2692777308161926e-05, "loss": 30.9652, "step": 2739, "task_loss": 0.9870017170906067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28310968631490585, "compression/movement_sparsity/importance_threshold": -0.005099311176129617, "compression/movement_sparsity/linear_layer_sparsity": 0.15670592054955723, "compression/movement_sparsity/model_sparsity": 0.1513225900948808, "compression_loss": 30.460256576538086, "distillation_loss": 0.7334739565849304, "epoch": 2.32, "learning_rate": 4.268808114961961e-05, "loss": 31.0782, "step": 2740, "task_loss": 1.5462909936904907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28378599788698256, "compression/movement_sparsity/importance_threshold": -0.005094500505526036, "compression/movement_sparsity/linear_layer_sparsity": 0.15739049893769974, "compression/movement_sparsity/model_sparsity": 0.1519836511093812, "compression_loss": 30.532608032226562, "distillation_loss": 0.8693815469741821, "epoch": 2.32, "learning_rate": 4.2683384991077305e-05, "loss": 31.2854, "step": 2741, "task_loss": 1.182247519493103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2844618839720825, "compression/movement_sparsity/importance_threshold": -0.005089692861453089, "compression/movement_sparsity/linear_layer_sparsity": 0.15824069209013678, "compression/movement_sparsity/model_sparsity": 0.15280463751153195, "compression_loss": 30.60493278503418, "distillation_loss": 0.3761720359325409, "epoch": 2.32, "learning_rate": 4.267868883253499e-05, "loss": 31.0153, "step": 2742, "task_loss": 0.44331586360931396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2851373447040906, "compression/movement_sparsity/importance_threshold": -0.005084888242958441, "compression/movement_sparsity/linear_layer_sparsity": 0.15895812156011613, "compression/movement_sparsity/model_sparsity": 0.15349742107214628, "compression_loss": 30.67719078063965, "distillation_loss": 0.6927005052566528, "epoch": 2.32, "learning_rate": 4.267399267399267e-05, "loss": 31.2148, "step": 2743, "task_loss": 2.1729400157928467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28581238021689204, "compression/movement_sparsity/importance_threshold": -0.005080086649089753, "compression/movement_sparsity/linear_layer_sparsity": 0.15976418336813283, "compression/movement_sparsity/model_sparsity": 0.15427579217732146, "compression_loss": 30.74939727783203, "distillation_loss": 0.3620891571044922, "epoch": 2.32, "learning_rate": 4.2669296515450364e-05, "loss": 31.2201, "step": 2744, "task_loss": 0.23057834804058075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2864869906443719, "compression/movement_sparsity/importance_threshold": -0.005075288078894685, "compression/movement_sparsity/linear_layer_sparsity": 0.16057618341163218, "compression/movement_sparsity/model_sparsity": 0.1550598975213223, "compression_loss": 30.82154083251953, "distillation_loss": 0.5313894748687744, "epoch": 2.32, "learning_rate": 4.266460035690805e-05, "loss": 31.3277, "step": 2745, "task_loss": 0.6670314073562622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2871611761204158, "compression/movement_sparsity/importance_threshold": -0.005070492531420898, "compression/movement_sparsity/linear_layer_sparsity": 0.16126627076522249, "compression/movement_sparsity/model_sparsity": 0.15572627825135976, "compression_loss": 30.89363670349121, "distillation_loss": 0.636996328830719, "epoch": 2.32, "learning_rate": 4.2659904198365744e-05, "loss": 31.5297, "step": 2746, "task_loss": 0.6782141327857971 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28783493677890815, "compression/movement_sparsity/importance_threshold": -0.005065700005716059, "compression/movement_sparsity/linear_layer_sparsity": 0.16194129789508868, "compression/movement_sparsity/model_sparsity": 0.1563781161226887, "compression_loss": 30.965700149536133, "distillation_loss": 1.1631801128387451, "epoch": 2.32, "learning_rate": 4.265520803982342e-05, "loss": 31.8414, "step": 2747, "task_loss": 1.0996763706207275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2885082727537346, "compression/movement_sparsity/importance_threshold": -0.005060910500827824, "compression/movement_sparsity/linear_layer_sparsity": 0.16268002392846567, "compression/movement_sparsity/model_sparsity": 0.1570914646442321, "compression_loss": 31.037704467773438, "distillation_loss": 0.760312020778656, "epoch": 2.32, "learning_rate": 4.2650511881281116e-05, "loss": 31.6361, "step": 2748, "task_loss": 0.567238986492157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28918118417878014, "compression/movement_sparsity/importance_threshold": -0.005056124015803859, "compression/movement_sparsity/linear_layer_sparsity": 0.16349220283447957, "compression/movement_sparsity/model_sparsity": 0.15787574270626986, "compression_loss": 31.10968780517578, "distillation_loss": 0.6885778307914734, "epoch": 2.32, "learning_rate": 4.26458157227388e-05, "loss": 31.7082, "step": 2749, "task_loss": 1.2327853441238403 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28985367118793004, "compression/movement_sparsity/importance_threshold": -0.005051340549691821, "compression/movement_sparsity/linear_layer_sparsity": 0.1642373917667152, "compression/movement_sparsity/model_sparsity": 0.1585953321062139, "compression_loss": 31.181612014770508, "distillation_loss": 0.7615737915039062, "epoch": 2.32, "learning_rate": 4.264111956419649e-05, "loss": 31.7946, "step": 2750, "task_loss": 1.2553461790084839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2905257339150692, "compression/movement_sparsity/importance_threshold": -0.005046560101539378, "compression/movement_sparsity/linear_layer_sparsity": 0.16486911031972795, "compression/movement_sparsity/model_sparsity": 0.15920534918353707, "compression_loss": 31.253507614135742, "distillation_loss": 0.5104919075965881, "epoch": 2.33, "learning_rate": 4.2636423405654175e-05, "loss": 31.8202, "step": 2751, "task_loss": 0.7416309714317322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29119737249408295, "compression/movement_sparsity/importance_threshold": -0.005041782670394186, "compression/movement_sparsity/linear_layer_sparsity": 0.1656551991469546, "compression/movement_sparsity/model_sparsity": 0.15996443344125638, "compression_loss": 31.325355529785156, "distillation_loss": 0.7044447660446167, "epoch": 2.33, "learning_rate": 4.263172724711186e-05, "loss": 31.9358, "step": 2752, "task_loss": 0.7137342691421509 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2918685870588563, "compression/movement_sparsity/importance_threshold": -0.0050370082553039125, "compression/movement_sparsity/linear_layer_sparsity": 0.16650533267855344, "compression/movement_sparsity/model_sparsity": 0.16078536227072818, "compression_loss": 31.397159576416016, "distillation_loss": 0.35591205954551697, "epoch": 2.33, "learning_rate": 4.2627031088569555e-05, "loss": 31.9396, "step": 2753, "task_loss": 0.5174103379249573 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29253937774327454, "compression/movement_sparsity/importance_threshold": -0.005032236855316214, "compression/movement_sparsity/linear_layer_sparsity": 0.16736960827296843, "compression/movement_sparsity/model_sparsity": 0.1616199473396523, "compression_loss": 31.468902587890625, "distillation_loss": 0.3430395722389221, "epoch": 2.33, "learning_rate": 4.262233493002724e-05, "loss": 32.0252, "step": 2754, "task_loss": 0.3031763732433319 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29320974468122263, "compression/movement_sparsity/importance_threshold": -0.005027468469478756, "compression/movement_sparsity/linear_layer_sparsity": 0.16824246926808123, "compression/movement_sparsity/model_sparsity": 0.16246282287434846, "compression_loss": 31.54060173034668, "distillation_loss": 0.7164740562438965, "epoch": 2.33, "learning_rate": 4.261763877148493e-05, "loss": 32.2002, "step": 2755, "task_loss": 1.1627956628799438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2938796880065856, "compression/movement_sparsity/importance_threshold": -0.005022703096839202, "compression/movement_sparsity/linear_layer_sparsity": 0.16896922343715182, "compression/movement_sparsity/model_sparsity": 0.16316461080195413, "compression_loss": 31.61226463317871, "distillation_loss": 0.5962386727333069, "epoch": 2.33, "learning_rate": 4.2612942612942614e-05, "loss": 32.2875, "step": 2756, "task_loss": 0.8632254600524902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.294549207853249, "compression/movement_sparsity/importance_threshold": -0.005017940736445209, "compression/movement_sparsity/linear_layer_sparsity": 0.1698230534607178, "compression/movement_sparsity/model_sparsity": 0.16398910913752224, "compression_loss": 31.683870315551758, "distillation_loss": 0.8112803101539612, "epoch": 2.33, "learning_rate": 4.26082464544003e-05, "loss": 32.3719, "step": 2757, "task_loss": 2.0563156604766846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29521830435509777, "compression/movement_sparsity/importance_threshold": -0.005013181387344441, "compression/movement_sparsity/linear_layer_sparsity": 0.1705776505612181, "compression/movement_sparsity/model_sparsity": 0.16471778350620817, "compression_loss": 31.755451202392578, "distillation_loss": 0.9084860682487488, "epoch": 2.33, "learning_rate": 4.260355029585799e-05, "loss": 32.9398, "step": 2758, "task_loss": 1.018105149269104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.295886977646017, "compression/movement_sparsity/importance_threshold": -0.0050084250485845595, "compression/movement_sparsity/linear_layer_sparsity": 0.1712156650747426, "compression/movement_sparsity/model_sparsity": 0.16533388025843085, "compression_loss": 31.827001571655273, "distillation_loss": 1.4098728895187378, "epoch": 2.33, "learning_rate": 4.259885413731568e-05, "loss": 32.6939, "step": 2759, "task_loss": 2.114043712615967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29655522785989197, "compression/movement_sparsity/importance_threshold": -0.005003671719213227, "compression/movement_sparsity/linear_layer_sparsity": 0.17183554292929293, "compression/movement_sparsity/model_sparsity": 0.16593246340171006, "compression_loss": 31.89850425720215, "distillation_loss": 0.6526762843132019, "epoch": 2.33, "learning_rate": 4.2594157978773366e-05, "loss": 32.5321, "step": 2760, "task_loss": 1.6808511018753052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29722305513060765, "compression/movement_sparsity/importance_threshold": -0.004998921398278104, "compression/movement_sparsity/linear_layer_sparsity": 0.17243061851516067, "compression/movement_sparsity/model_sparsity": 0.16650709631053662, "compression_loss": 31.969995498657227, "distillation_loss": 0.48595544695854187, "epoch": 2.33, "learning_rate": 4.258946182023105e-05, "loss": 32.6616, "step": 2761, "task_loss": 0.1865263730287552 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2978904595920493, "compression/movement_sparsity/importance_threshold": -0.004994174084826854, "compression/movement_sparsity/linear_layer_sparsity": 0.1731237703798334, "compression/movement_sparsity/model_sparsity": 0.16717643627627327, "compression_loss": 32.041419982910156, "distillation_loss": 1.0387502908706665, "epoch": 2.33, "learning_rate": 4.258476566168874e-05, "loss": 32.7551, "step": 2762, "task_loss": 1.9528183937072754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29855744137810203, "compression/movement_sparsity/importance_threshold": -0.004989429777907138, "compression/movement_sparsity/linear_layer_sparsity": 0.17389935401537282, "compression/movement_sparsity/model_sparsity": 0.16792537622795758, "compression_loss": 32.11284637451172, "distillation_loss": 0.5862445831298828, "epoch": 2.34, "learning_rate": 4.258006950314643e-05, "loss": 32.8377, "step": 2763, "task_loss": 0.391838937997818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.299224000622651, "compression/movement_sparsity/importance_threshold": -0.004984688476566618, "compression/movement_sparsity/linear_layer_sparsity": 0.1745513317291989, "compression/movement_sparsity/model_sparsity": 0.16855495650159572, "compression_loss": 32.1842041015625, "distillation_loss": 1.0807204246520996, "epoch": 2.34, "learning_rate": 4.257537334460411e-05, "loss": 32.9268, "step": 2764, "task_loss": 0.592423677444458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29989013745958093, "compression/movement_sparsity/importance_threshold": -0.004979950179852958, "compression/movement_sparsity/linear_layer_sparsity": 0.17521851275676073, "compression/movement_sparsity/model_sparsity": 0.16919921780837185, "compression_loss": 32.25554656982422, "distillation_loss": 0.526317298412323, "epoch": 2.34, "learning_rate": 4.2570677186061804e-05, "loss": 32.8347, "step": 2765, "task_loss": 0.6176579594612122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.300555852022778, "compression/movement_sparsity/importance_threshold": -0.004975214886813814, "compression/movement_sparsity/linear_layer_sparsity": 0.1758458551402511, "compression/movement_sparsity/model_sparsity": 0.16980500905105844, "compression_loss": 32.32683563232422, "distillation_loss": 0.8739572763442993, "epoch": 2.34, "learning_rate": 4.256598102751949e-05, "loss": 33.2407, "step": 2766, "task_loss": 0.5652173161506653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30122114444612613, "compression/movement_sparsity/importance_threshold": -0.004970482596496855, "compression/movement_sparsity/linear_layer_sparsity": 0.17652202699021036, "compression/movement_sparsity/model_sparsity": 0.17045795231782365, "compression_loss": 32.398075103759766, "distillation_loss": 1.167102575302124, "epoch": 2.34, "learning_rate": 4.256128486897718e-05, "loss": 33.349, "step": 2767, "task_loss": 1.3371820449829102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3018860148635112, "compression/movement_sparsity/importance_threshold": -0.0049657533079497385, "compression/movement_sparsity/linear_layer_sparsity": 0.17714058126215312, "compression/movement_sparsity/model_sparsity": 0.17105525734762966, "compression_loss": 32.46928405761719, "distillation_loss": 0.7129797339439392, "epoch": 2.34, "learning_rate": 4.255658871043486e-05, "loss": 33.151, "step": 2768, "task_loss": 0.1442427933216095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3025504634088183, "compression/movement_sparsity/importance_threshold": -0.004961027020220127, "compression/movement_sparsity/linear_layer_sparsity": 0.17779098498585125, "compression/movement_sparsity/model_sparsity": 0.17168331770254291, "compression_loss": 32.54045486450195, "distillation_loss": 0.4430408477783203, "epoch": 2.34, "learning_rate": 4.255189255189255e-05, "loss": 33.4072, "step": 2769, "task_loss": 0.40197616815567017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3032144902159324, "compression/movement_sparsity/importance_threshold": -0.004956303732355682, "compression/movement_sparsity/linear_layer_sparsity": 0.17843205208629054, "compression/movement_sparsity/model_sparsity": 0.172302362175929, "compression_loss": 32.611595153808594, "distillation_loss": 0.8200084567070007, "epoch": 2.34, "learning_rate": 4.254719639335024e-05, "loss": 33.3438, "step": 2770, "task_loss": 0.9930720329284668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30387809541873867, "compression/movement_sparsity/importance_threshold": -0.004951583443404067, "compression/movement_sparsity/linear_layer_sparsity": 0.17907636256032675, "compression/movement_sparsity/model_sparsity": 0.1729245386030512, "compression_loss": 32.68266677856445, "distillation_loss": 0.7819734811782837, "epoch": 2.34, "learning_rate": 4.254250023480793e-05, "loss": 33.3746, "step": 2771, "task_loss": 0.6970024704933167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3045412791511223, "compression/movement_sparsity/importance_threshold": -0.004946866152412943, "compression/movement_sparsity/linear_layer_sparsity": 0.17969014716521517, "compression/movement_sparsity/model_sparsity": 0.17351723781853942, "compression_loss": 32.75369644165039, "distillation_loss": 0.7925294041633606, "epoch": 2.34, "learning_rate": 4.253780407626562e-05, "loss": 33.474, "step": 2772, "task_loss": 0.33001670241355896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30520404154696845, "compression/movement_sparsity/importance_threshold": -0.00494215185842997, "compression/movement_sparsity/linear_layer_sparsity": 0.18045656111584263, "compression/movement_sparsity/model_sparsity": 0.17425732309219774, "compression_loss": 32.82465362548828, "distillation_loss": 0.3199234902858734, "epoch": 2.34, "learning_rate": 4.25331079177233e-05, "loss": 33.3113, "step": 2773, "task_loss": 0.7486782073974609 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3058663827401621, "compression/movement_sparsity/importance_threshold": -0.004937440560502814, "compression/movement_sparsity/linear_layer_sparsity": 0.1810549039236426, "compression/movement_sparsity/model_sparsity": 0.174835110983832, "compression_loss": 32.89556121826172, "distillation_loss": 0.8446411490440369, "epoch": 2.34, "learning_rate": 4.2528411759180995e-05, "loss": 33.592, "step": 2774, "task_loss": 0.7513561844825745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30652830286458854, "compression/movement_sparsity/importance_threshold": -0.004932732257679132, "compression/movement_sparsity/linear_layer_sparsity": 0.1817724884078012, "compression/movement_sparsity/model_sparsity": 0.17552804423341167, "compression_loss": 32.96642303466797, "distillation_loss": 0.7196393013000488, "epoch": 2.35, "learning_rate": 4.252371560063868e-05, "loss": 33.6426, "step": 2775, "task_loss": 0.33948174118995667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3071898020541327, "compression/movement_sparsity/importance_threshold": -0.0049280269490065915, "compression/movement_sparsity/linear_layer_sparsity": 0.18241526066421238, "compression/movement_sparsity/model_sparsity": 0.17614873528541639, "compression_loss": 33.0372314453125, "distillation_loss": 0.751387894153595, "epoch": 2.35, "learning_rate": 4.251901944209637e-05, "loss": 33.8649, "step": 2776, "task_loss": 0.9632173776626587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30785088044268005, "compression/movement_sparsity/importance_threshold": -0.004923324633532849, "compression/movement_sparsity/linear_layer_sparsity": 0.18306108550753836, "compression/movement_sparsity/model_sparsity": 0.1767723740585845, "compression_loss": 33.108001708984375, "distillation_loss": 0.7700991034507751, "epoch": 2.35, "learning_rate": 4.2514323283554054e-05, "loss": 33.7019, "step": 2777, "task_loss": 0.7082532048225403 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3085115381641157, "compression/movement_sparsity/importance_threshold": -0.004918625310305569, "compression/movement_sparsity/linear_layer_sparsity": 0.1836227376515228, "compression/movement_sparsity/model_sparsity": 0.17731473172357898, "compression_loss": 33.17872619628906, "distillation_loss": 0.6718583106994629, "epoch": 2.35, "learning_rate": 4.250962712501174e-05, "loss": 33.7763, "step": 2778, "task_loss": 0.2824065089225769 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30917177535232454, "compression/movement_sparsity/importance_threshold": -0.004913928978372412, "compression/movement_sparsity/linear_layer_sparsity": 0.18427292674020349, "compression/movement_sparsity/model_sparsity": 0.17794258481684788, "compression_loss": 33.24942398071289, "distillation_loss": 0.46940702199935913, "epoch": 2.35, "learning_rate": 4.2504930966469433e-05, "loss": 33.9225, "step": 2779, "task_loss": 0.4070832133293152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3098315921411918, "compression/movement_sparsity/importance_threshold": -0.004909235636781042, "compression/movement_sparsity/linear_layer_sparsity": 0.18481341348663424, "compression/movement_sparsity/model_sparsity": 0.17846450418080703, "compression_loss": 33.320064544677734, "distillation_loss": 0.36373090744018555, "epoch": 2.35, "learning_rate": 4.250023480792712e-05, "loss": 33.9599, "step": 2780, "task_loss": 0.4446256458759308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3104909886646028, "compression/movement_sparsity/importance_threshold": -0.004904545284579118, "compression/movement_sparsity/linear_layer_sparsity": 0.18540888257003396, "compression/movement_sparsity/model_sparsity": 0.1790395170693148, "compression_loss": 33.39065170288086, "distillation_loss": 0.9020224809646606, "epoch": 2.35, "learning_rate": 4.2495538649384806e-05, "loss": 34.0524, "step": 2781, "task_loss": 1.6108921766281128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31114996505644243, "compression/movement_sparsity/importance_threshold": -0.004899857920814304, "compression/movement_sparsity/linear_layer_sparsity": 0.18612340254311016, "compression/movement_sparsity/model_sparsity": 0.1797294910831953, "compression_loss": 33.46118927001953, "distillation_loss": 0.643280029296875, "epoch": 2.35, "learning_rate": 4.249084249084249e-05, "loss": 34.0833, "step": 2782, "task_loss": 1.129131555557251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.311808521450596, "compression/movement_sparsity/importance_threshold": -0.0048951735445342615, "compression/movement_sparsity/linear_layer_sparsity": 0.18676158399498155, "compression/movement_sparsity/model_sparsity": 0.18034574903891912, "compression_loss": 33.53169250488281, "distillation_loss": 1.1293323040008545, "epoch": 2.35, "learning_rate": 4.248614633230018e-05, "loss": 34.2809, "step": 2783, "task_loss": 1.2853496074676514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31246665798094864, "compression/movement_sparsity/importance_threshold": -0.004890492154786651, "compression/movement_sparsity/linear_layer_sparsity": 0.18747902538912853, "compression/movement_sparsity/model_sparsity": 0.18103854411406925, "compression_loss": 33.602169036865234, "distillation_loss": 0.5802097320556641, "epoch": 2.35, "learning_rate": 4.248145017375787e-05, "loss": 34.2198, "step": 2784, "task_loss": 0.7919918894767761 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3131243747813851, "compression/movement_sparsity/importance_threshold": -0.004885813750619139, "compression/movement_sparsity/linear_layer_sparsity": 0.18821493731894343, "compression/movement_sparsity/model_sparsity": 0.18174917520516515, "compression_loss": 33.672584533691406, "distillation_loss": 0.8476006388664246, "epoch": 2.35, "learning_rate": 4.247675401521555e-05, "loss": 34.4044, "step": 2785, "task_loss": 0.7706384658813477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3137816719857913, "compression/movement_sparsity/importance_threshold": -0.004881138331079379, "compression/movement_sparsity/linear_layer_sparsity": 0.18878290927177488, "compression/movement_sparsity/model_sparsity": 0.1822976355741307, "compression_loss": 33.7429313659668, "distillation_loss": 0.90858393907547, "epoch": 2.35, "learning_rate": 4.2472057856673245e-05, "loss": 34.5114, "step": 2786, "task_loss": 1.2514575719833374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3144385497280515, "compression/movement_sparsity/importance_threshold": -0.004876465895215043, "compression/movement_sparsity/linear_layer_sparsity": 0.18944988758848685, "compression/movement_sparsity/model_sparsity": 0.18294170113379835, "compression_loss": 33.8132438659668, "distillation_loss": 0.8140503168106079, "epoch": 2.36, "learning_rate": 4.246736169813093e-05, "loss": 34.4988, "step": 2787, "task_loss": 0.8396199345588684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3150950081420516, "compression/movement_sparsity/importance_threshold": -0.004871796442073783, "compression/movement_sparsity/linear_layer_sparsity": 0.19030790299489303, "compression/movement_sparsity/model_sparsity": 0.1837702410714303, "compression_loss": 33.88349533081055, "distillation_loss": 0.7679473161697388, "epoch": 2.36, "learning_rate": 4.246266553958862e-05, "loss": 34.557, "step": 2788, "task_loss": 0.7439993023872375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31575104736167636, "compression/movement_sparsity/importance_threshold": -0.004867129970703267, "compression/movement_sparsity/linear_layer_sparsity": 0.19092146104059635, "compression/movement_sparsity/model_sparsity": 0.1843627215107384, "compression_loss": 33.95370864868164, "distillation_loss": 0.8671605587005615, "epoch": 2.36, "learning_rate": 4.245796938104631e-05, "loss": 34.7112, "step": 2789, "task_loss": 1.0245537757873535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31640666752081104, "compression/movement_sparsity/importance_threshold": -0.004862466480151154, "compression/movement_sparsity/linear_layer_sparsity": 0.19158086751085956, "compression/movement_sparsity/model_sparsity": 0.18499947534017652, "compression_loss": 34.02387619018555, "distillation_loss": 0.5069687366485596, "epoch": 2.36, "learning_rate": 4.245327322250399e-05, "loss": 34.6788, "step": 2790, "task_loss": 0.8868238925933838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31706186875334064, "compression/movement_sparsity/importance_threshold": -0.004857805969465109, "compression/movement_sparsity/linear_layer_sparsity": 0.19225913801432273, "compression/movement_sparsity/model_sparsity": 0.1856544451652416, "compression_loss": 34.0940055847168, "distillation_loss": 0.503166913986206, "epoch": 2.36, "learning_rate": 4.244857706396168e-05, "loss": 34.5817, "step": 2791, "task_loss": 0.1901887059211731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3177166511931504, "compression/movement_sparsity/importance_threshold": -0.004853148437692791, "compression/movement_sparsity/linear_layer_sparsity": 0.1929749457975036, "compression/movement_sparsity/model_sparsity": 0.18634566274898787, "compression_loss": 34.1640625, "distillation_loss": 0.5947093963623047, "epoch": 2.36, "learning_rate": 4.244388090541937e-05, "loss": 34.7604, "step": 2792, "task_loss": 0.2938380539417267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31837101497412545, "compression/movement_sparsity/importance_threshold": -0.004848493883881862, "compression/movement_sparsity/linear_layer_sparsity": 0.19372629952440695, "compression/movement_sparsity/model_sparsity": 0.1870712051639377, "compression_loss": 34.23403549194336, "distillation_loss": 0.6587586998939514, "epoch": 2.36, "learning_rate": 4.2439184746877056e-05, "loss": 34.8707, "step": 2793, "task_loss": 1.0960184335708618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3190249602301509, "compression/movement_sparsity/importance_threshold": -0.004843842307079986, "compression/movement_sparsity/linear_layer_sparsity": 0.19446485861943705, "compression/movement_sparsity/model_sparsity": 0.18778439248197995, "compression_loss": 34.30398941040039, "distillation_loss": 0.48593980073928833, "epoch": 2.36, "learning_rate": 4.243448858833474e-05, "loss": 34.9568, "step": 2794, "task_loss": 1.3768370151519775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31967848709511204, "compression/movement_sparsity/importance_threshold": -0.0048391937063348225, "compression/movement_sparsity/linear_layer_sparsity": 0.195005011489174, "compression/movement_sparsity/model_sparsity": 0.18830598943893687, "compression_loss": 34.37389373779297, "distillation_loss": 0.5051955580711365, "epoch": 2.36, "learning_rate": 4.242979242979243e-05, "loss": 35.028, "step": 2795, "task_loss": 0.10261370986700058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3203315957028935, "compression/movement_sparsity/importance_threshold": -0.004834548080694037, "compression/movement_sparsity/linear_layer_sparsity": 0.1957210935282105, "compression/movement_sparsity/model_sparsity": 0.1889974718570064, "compression_loss": 34.44375991821289, "distillation_loss": 1.163475513458252, "epoch": 2.36, "learning_rate": 4.242509627125012e-05, "loss": 35.2886, "step": 2796, "task_loss": 1.0616337060928345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3209842861873812, "compression/movement_sparsity/importance_threshold": -0.004829905429205285, "compression/movement_sparsity/linear_layer_sparsity": 0.1964967010120852, "compression/movement_sparsity/model_sparsity": 0.18974643483776232, "compression_loss": 34.513572692871094, "distillation_loss": 0.5180312991142273, "epoch": 2.36, "learning_rate": 4.242040011270781e-05, "loss": 35.129, "step": 2797, "task_loss": 0.6302485466003418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32163655868245977, "compression/movement_sparsity/importance_threshold": -0.004825265750916233, "compression/movement_sparsity/linear_layer_sparsity": 0.19717633087065883, "compression/movement_sparsity/model_sparsity": 0.19040271731990796, "compression_loss": 34.58334732055664, "distillation_loss": 0.7106043100357056, "epoch": 2.36, "learning_rate": 4.2415703954165494e-05, "loss": 35.1452, "step": 2798, "task_loss": 0.28174299001693726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3222884133220144, "compression/movement_sparsity/importance_threshold": -0.004820629044874543, "compression/movement_sparsity/linear_layer_sparsity": 0.19775350828090515, "compression/movement_sparsity/model_sparsity": 0.19096006691050693, "compression_loss": 34.65303421020508, "distillation_loss": 0.5265079736709595, "epoch": 2.37, "learning_rate": 4.241100779562318e-05, "loss": 35.1456, "step": 2799, "task_loss": 0.9019681811332703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32293985023993044, "compression/movement_sparsity/importance_threshold": -0.004815995310127875, "compression/movement_sparsity/linear_layer_sparsity": 0.19856894248468365, "compression/movement_sparsity/model_sparsity": 0.1917474884408166, "compression_loss": 34.7226676940918, "distillation_loss": 0.894328773021698, "epoch": 2.37, "learning_rate": 4.240631163708087e-05, "loss": 35.3802, "step": 2800, "task_loss": 0.6517687439918518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32359086957009287, "compression/movement_sparsity/importance_threshold": -0.004811364545723891, "compression/movement_sparsity/linear_layer_sparsity": 0.19947881609613818, "compression/movement_sparsity/model_sparsity": 0.19262610509461903, "compression_loss": 34.79227828979492, "distillation_loss": 0.5099529027938843, "epoch": 2.37, "learning_rate": 4.240161547853856e-05, "loss": 35.2825, "step": 2801, "task_loss": 1.2869151830673218 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3242414714463868, "compression/movement_sparsity/importance_threshold": -0.004806736750710253, "compression/movement_sparsity/linear_layer_sparsity": 0.20031886178191707, "compression/movement_sparsity/model_sparsity": 0.19343729262680864, "compression_loss": 34.861846923828125, "distillation_loss": 0.8410700559616089, "epoch": 2.37, "learning_rate": 4.2396919319996246e-05, "loss": 35.4882, "step": 2802, "task_loss": 0.828881561756134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3248916560026974, "compression/movement_sparsity/importance_threshold": -0.004802111924134625, "compression/movement_sparsity/linear_layer_sparsity": 0.20109116627135667, "compression/movement_sparsity/model_sparsity": 0.19418306608114946, "compression_loss": 34.93134307861328, "distillation_loss": 1.0441285371780396, "epoch": 2.37, "learning_rate": 4.239222316145393e-05, "loss": 35.6953, "step": 2803, "task_loss": 0.6659601330757141 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3255414233729099, "compression/movement_sparsity/importance_threshold": -0.0047974900650446664, "compression/movement_sparsity/linear_layer_sparsity": 0.2017171612239042, "compression/movement_sparsity/model_sparsity": 0.19478755618129126, "compression_loss": 35.00080871582031, "distillation_loss": 0.4531797766685486, "epoch": 2.37, "learning_rate": 4.238752700291162e-05, "loss": 35.7957, "step": 2804, "task_loss": 0.8206874132156372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3261907736909093, "compression/movement_sparsity/importance_threshold": -0.00479287117248804, "compression/movement_sparsity/linear_layer_sparsity": 0.20249883810910557, "compression/movement_sparsity/model_sparsity": 0.19554238006076657, "compression_loss": 35.07024383544922, "distillation_loss": 0.7265602350234985, "epoch": 2.37, "learning_rate": 4.238283084436931e-05, "loss": 35.7535, "step": 2805, "task_loss": 1.8234038352966309 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32683970709058086, "compression/movement_sparsity/importance_threshold": -0.004788255245512407, "compression/movement_sparsity/linear_layer_sparsity": 0.20329577792888082, "compression/movement_sparsity/model_sparsity": 0.1963119425460589, "compression_loss": 35.13960647583008, "distillation_loss": 0.5765610337257385, "epoch": 2.37, "learning_rate": 4.2378134685827e-05, "loss": 35.8149, "step": 2806, "task_loss": 0.23761634528636932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32748822370580943, "compression/movement_sparsity/importance_threshold": -0.0047836422831654315, "compression/movement_sparsity/linear_layer_sparsity": 0.20408332150455905, "compression/movement_sparsity/model_sparsity": 0.19707243157714516, "compression_loss": 35.208961486816406, "distillation_loss": 0.5704872608184814, "epoch": 2.37, "learning_rate": 4.237343852728468e-05, "loss": 35.7968, "step": 2807, "task_loss": 0.8374197483062744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3281363236704806, "compression/movement_sparsity/importance_threshold": -0.004779032284494772, "compression/movement_sparsity/linear_layer_sparsity": 0.20489226896114363, "compression/movement_sparsity/model_sparsity": 0.1978535891999826, "compression_loss": 35.27824020385742, "distillation_loss": 0.5952828526496887, "epoch": 2.37, "learning_rate": 4.236874236874237e-05, "loss": 35.9671, "step": 2808, "task_loss": 0.9903055429458618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3287840071184793, "compression/movement_sparsity/importance_threshold": -0.004774425248548092, "compression/movement_sparsity/linear_layer_sparsity": 0.20556560285920553, "compression/movement_sparsity/model_sparsity": 0.19850379200722873, "compression_loss": 35.34748077392578, "distillation_loss": 0.9510096311569214, "epoch": 2.37, "learning_rate": 4.236404621020006e-05, "loss": 36.1168, "step": 2809, "task_loss": 0.7699193358421326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3294312741836908, "compression/movement_sparsity/importance_threshold": -0.004769821174373053, "compression/movement_sparsity/linear_layer_sparsity": 0.2062986052921173, "compression/movement_sparsity/model_sparsity": 0.19921161355159075, "compression_loss": 35.41666030883789, "distillation_loss": 0.8322913646697998, "epoch": 2.38, "learning_rate": 4.235935005165775e-05, "loss": 36.176, "step": 2810, "task_loss": 0.7650166749954224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.330078125, "compression/movement_sparsity/importance_threshold": -0.004765220061017317, "compression/movement_sparsity/linear_layer_sparsity": 0.20706193088332708, "compression/movement_sparsity/model_sparsity": 0.19994871656047827, "compression_loss": 35.48580551147461, "distillation_loss": 0.5221373438835144, "epoch": 2.38, "learning_rate": 4.235465389311543e-05, "loss": 36.1331, "step": 2811, "task_loss": 1.115736722946167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3307245597012922, "compression/movement_sparsity/importance_threshold": -0.004760621907528545, "compression/movement_sparsity/linear_layer_sparsity": 0.20795502719091793, "compression/movement_sparsity/model_sparsity": 0.20081113226241779, "compression_loss": 35.55485916137695, "distillation_loss": 0.6537224054336548, "epoch": 2.38, "learning_rate": 4.234995773457312e-05, "loss": 36.1572, "step": 2812, "task_loss": 0.8449608087539673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33137057842145246, "compression/movement_sparsity/importance_threshold": -0.004756026712954402, "compression/movement_sparsity/linear_layer_sparsity": 0.20882410822489017, "compression/movement_sparsity/model_sparsity": 0.2016503576892671, "compression_loss": 35.6238899230957, "distillation_loss": 0.347369909286499, "epoch": 2.38, "learning_rate": 4.234526157603081e-05, "loss": 36.1757, "step": 2813, "task_loss": 0.8491607308387756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33201618129436594, "compression/movement_sparsity/importance_threshold": -0.004751434476342546, "compression/movement_sparsity/linear_layer_sparsity": 0.20958421429083826, "compression/movement_sparsity/model_sparsity": 0.2023843517734901, "compression_loss": 35.69287109375, "distillation_loss": 0.5068443417549133, "epoch": 2.38, "learning_rate": 4.2340565417488496e-05, "loss": 36.2428, "step": 2814, "task_loss": 0.2485508918762207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3326613684539179, "compression/movement_sparsity/importance_threshold": -0.004746845196740641, "compression/movement_sparsity/linear_layer_sparsity": 0.21043681612513776, "compression/movement_sparsity/model_sparsity": 0.20320766411187136, "compression_loss": 35.761783599853516, "distillation_loss": 0.9379880428314209, "epoch": 2.38, "learning_rate": 4.233586925894618e-05, "loss": 36.5238, "step": 2815, "task_loss": 1.0784682035446167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33330614003399306, "compression/movement_sparsity/importance_threshold": -0.0047422588731963495, "compression/movement_sparsity/linear_layer_sparsity": 0.21126143193799585, "compression/movement_sparsity/model_sparsity": 0.20400395183474282, "compression_loss": 35.830650329589844, "distillation_loss": 0.9720501899719238, "epoch": 2.38, "learning_rate": 4.233117310040387e-05, "loss": 36.6423, "step": 2816, "task_loss": 0.9821082949638367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33395049616847705, "compression/movement_sparsity/importance_threshold": -0.004737675504757332, "compression/movement_sparsity/linear_layer_sparsity": 0.2120556649717178, "compression/movement_sparsity/model_sparsity": 0.2047709005204098, "compression_loss": 35.89946365356445, "distillation_loss": 0.6715864539146423, "epoch": 2.38, "learning_rate": 4.232647694186156e-05, "loss": 36.4041, "step": 2817, "task_loss": 0.4782352149486542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3345944369912549, "compression/movement_sparsity/importance_threshold": -0.00473309509047125, "compression/movement_sparsity/linear_layer_sparsity": 0.2128814135805013, "compression/movement_sparsity/model_sparsity": 0.20556828212418177, "compression_loss": 35.96826171875, "distillation_loss": 0.4472971558570862, "epoch": 2.38, "learning_rate": 4.232178078331925e-05, "loss": 36.4993, "step": 2818, "task_loss": 0.06616273522377014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33523796263621175, "compression/movement_sparsity/importance_threshold": -0.0047285176293857655, "compression/movement_sparsity/linear_layer_sparsity": 0.21357303915171663, "compression/movement_sparsity/model_sparsity": 0.20623614822933672, "compression_loss": 36.037044525146484, "distillation_loss": 0.5804795622825623, "epoch": 2.38, "learning_rate": 4.2317084624776934e-05, "loss": 36.5342, "step": 2819, "task_loss": 0.3965770900249481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3358810732372326, "compression/movement_sparsity/importance_threshold": -0.004723943120548541, "compression/movement_sparsity/linear_layer_sparsity": 0.2144157677752136, "compression/movement_sparsity/model_sparsity": 0.2070499265320801, "compression_loss": 36.10574722290039, "distillation_loss": 0.5074720978736877, "epoch": 2.38, "learning_rate": 4.231238846623462e-05, "loss": 36.9602, "step": 2820, "task_loss": 0.9928963780403137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3365237689282028, "compression/movement_sparsity/importance_threshold": -0.004719371563007237, "compression/movement_sparsity/linear_layer_sparsity": 0.21517605270367623, "compression/movement_sparsity/model_sparsity": 0.20778409333434, "compression_loss": 36.17441940307617, "distillation_loss": 0.5468608140945435, "epoch": 2.38, "learning_rate": 4.230769230769231e-05, "loss": 36.8514, "step": 2821, "task_loss": 0.6402634382247925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33716604984300724, "compression/movement_sparsity/importance_threshold": -0.004714802955809517, "compression/movement_sparsity/linear_layer_sparsity": 0.2160370968486619, "compression/movement_sparsity/model_sparsity": 0.2086155579640638, "compression_loss": 36.2430419921875, "distillation_loss": 0.7278431057929993, "epoch": 2.39, "learning_rate": 4.230299614915e-05, "loss": 36.9288, "step": 2822, "task_loss": 0.5286237001419067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3378079161155312, "compression/movement_sparsity/importance_threshold": -0.004710237298003043, "compression/movement_sparsity/linear_layer_sparsity": 0.2166386472575559, "compression/movement_sparsity/model_sparsity": 0.2091964432658268, "compression_loss": 36.31161880493164, "distillation_loss": 0.7924224138259888, "epoch": 2.39, "learning_rate": 4.2298299990607687e-05, "loss": 37.0103, "step": 2823, "task_loss": 1.0442495346069336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33844936787965985, "compression/movement_sparsity/importance_threshold": -0.004705674588635475, "compression/movement_sparsity/linear_layer_sparsity": 0.21736581877249375, "compression/movement_sparsity/model_sparsity": 0.20989863420218527, "compression_loss": 36.380165100097656, "distillation_loss": 0.8541896939277649, "epoch": 2.39, "learning_rate": 4.229360383206537e-05, "loss": 37.2201, "step": 2824, "task_loss": 0.7706206440925598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33909040526927825, "compression/movement_sparsity/importance_threshold": -0.004701114826754478, "compression/movement_sparsity/linear_layer_sparsity": 0.2181137502632856, "compression/movement_sparsity/model_sparsity": 0.21062087194536205, "compression_loss": 36.44866180419922, "distillation_loss": 0.3980153799057007, "epoch": 2.39, "learning_rate": 4.228890767352306e-05, "loss": 36.9727, "step": 2825, "task_loss": 0.5206355452537537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3397310284182715, "compression/movement_sparsity/importance_threshold": -0.0046965580114077116, "compression/movement_sparsity/linear_layer_sparsity": 0.2189440419799384, "compression/movement_sparsity/model_sparsity": 0.21142264058727173, "compression_loss": 36.517120361328125, "distillation_loss": 0.5039831399917603, "epoch": 2.39, "learning_rate": 4.2284211514980746e-05, "loss": 37.1221, "step": 2826, "task_loss": 0.1292082518339157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34037123746052467, "compression/movement_sparsity/importance_threshold": -0.004692004141642838, "compression/movement_sparsity/linear_layer_sparsity": 0.2198297333794274, "compression/movement_sparsity/model_sparsity": 0.21227790576248284, "compression_loss": 36.585487365722656, "distillation_loss": 0.7380480170249939, "epoch": 2.39, "learning_rate": 4.227951535643844e-05, "loss": 37.2617, "step": 2827, "task_loss": 0.31648674607276917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34101103252992326, "compression/movement_sparsity/importance_threshold": -0.004687453216507518, "compression/movement_sparsity/linear_layer_sparsity": 0.22062362061228788, "compression/movement_sparsity/model_sparsity": 0.21304452052661177, "compression_loss": 36.6538200378418, "distillation_loss": 0.5199155807495117, "epoch": 2.39, "learning_rate": 4.227481919789612e-05, "loss": 37.1479, "step": 2828, "task_loss": 0.2934201657772064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3416504137603522, "compression/movement_sparsity/importance_threshold": -0.004682905235049415, "compression/movement_sparsity/linear_layer_sparsity": 0.2213713374680623, "compression/movement_sparsity/model_sparsity": 0.21376655100814423, "compression_loss": 36.72210693359375, "distillation_loss": 0.8593736290931702, "epoch": 2.39, "learning_rate": 4.227012303935381e-05, "loss": 37.5543, "step": 2829, "task_loss": 1.9251400232315063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34228938128569664, "compression/movement_sparsity/importance_threshold": -0.00467836019631619, "compression/movement_sparsity/linear_layer_sparsity": 0.22210568733191693, "compression/movement_sparsity/model_sparsity": 0.21447567369505105, "compression_loss": 36.79035949707031, "distillation_loss": 0.8809927105903625, "epoch": 2.39, "learning_rate": 4.22654268808115e-05, "loss": 37.4862, "step": 2830, "task_loss": 0.9311612844467163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3429279352398418, "compression/movement_sparsity/importance_threshold": -0.0046738180993555044, "compression/movement_sparsity/linear_layer_sparsity": 0.22287076577576917, "compression/movement_sparsity/model_sparsity": 0.21521446934070038, "compression_loss": 36.85855484008789, "distillation_loss": 0.47958043217658997, "epoch": 2.39, "learning_rate": 4.2260730722269184e-05, "loss": 37.383, "step": 2831, "task_loss": 0.03479837626218796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34356607575667253, "compression/movement_sparsity/importance_threshold": -0.0046692789432150215, "compression/movement_sparsity/linear_layer_sparsity": 0.2235902462025819, "compression/movement_sparsity/model_sparsity": 0.21590923340147136, "compression_loss": 36.926692962646484, "distillation_loss": 0.6977794170379639, "epoch": 2.39, "learning_rate": 4.225603456372688e-05, "loss": 37.7612, "step": 2832, "task_loss": 0.7564378976821899 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34420380297007425, "compression/movement_sparsity/importance_threshold": -0.004664742726942402, "compression/movement_sparsity/linear_layer_sparsity": 0.2244419298759734, "compression/movement_sparsity/model_sparsity": 0.21673165912059644, "compression_loss": 36.994781494140625, "distillation_loss": 0.5316818952560425, "epoch": 2.39, "learning_rate": 4.225133840518456e-05, "loss": 37.6493, "step": 2833, "task_loss": 1.0452696084976196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34484111701393205, "compression/movement_sparsity/importance_threshold": -0.0046602094495853084, "compression/movement_sparsity/linear_layer_sparsity": 0.22528922545567492, "compression/movement_sparsity/model_sparsity": 0.21754984749054912, "compression_loss": 37.06282043457031, "distillation_loss": 0.6170206665992737, "epoch": 2.4, "learning_rate": 4.224664224664225e-05, "loss": 37.6723, "step": 2834, "task_loss": 0.8176475763320923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3454780180221311, "compression/movement_sparsity/importance_threshold": -0.004655679110191402, "compression/movement_sparsity/linear_layer_sparsity": 0.22615468154268584, "compression/movement_sparsity/model_sparsity": 0.2183855724985169, "compression_loss": 37.13083267211914, "distillation_loss": 0.3944639563560486, "epoch": 2.4, "learning_rate": 4.2241946088099936e-05, "loss": 37.6166, "step": 2835, "task_loss": 0.39975884556770325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3461145061285561, "compression/movement_sparsity/importance_threshold": -0.004651151707808348, "compression/movement_sparsity/linear_layer_sparsity": 0.22700638906441264, "compression/movement_sparsity/model_sparsity": 0.21920802124671357, "compression_loss": 37.198795318603516, "distillation_loss": 0.5506384968757629, "epoch": 2.4, "learning_rate": 4.223724992955763e-05, "loss": 37.7918, "step": 2836, "task_loss": 1.3701025247573853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34675058146709314, "compression/movement_sparsity/importance_threshold": -0.0046466272414838, "compression/movement_sparsity/linear_layer_sparsity": 0.22775130374079264, "compression/movement_sparsity/model_sparsity": 0.21992734581233433, "compression_loss": 37.266700744628906, "distillation_loss": 0.5260502099990845, "epoch": 2.4, "learning_rate": 4.223255377101531e-05, "loss": 37.8481, "step": 2837, "task_loss": 0.765106201171875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34738624417162633, "compression/movement_sparsity/importance_threshold": -0.004642105710265429, "compression/movement_sparsity/linear_layer_sparsity": 0.22853890693730902, "compression/movement_sparsity/model_sparsity": 0.22068789241609954, "compression_loss": 37.334590911865234, "distillation_loss": 0.517400860786438, "epoch": 2.4, "learning_rate": 4.2227857612472995e-05, "loss": 38.1132, "step": 2838, "task_loss": 0.49602624773979187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34802149437604146, "compression/movement_sparsity/importance_threshold": -0.004637587113200892, "compression/movement_sparsity/linear_layer_sparsity": 0.22943069158645996, "compression/movement_sparsity/model_sparsity": 0.22154904151910165, "compression_loss": 37.40242004394531, "distillation_loss": 0.8058321475982666, "epoch": 2.4, "learning_rate": 4.222316145393069e-05, "loss": 38.1899, "step": 2839, "task_loss": 0.9105251431465149 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34865633221422343, "compression/movement_sparsity/importance_threshold": -0.0046330714493378515, "compression/movement_sparsity/linear_layer_sparsity": 0.23005344316541051, "compression/movement_sparsity/model_sparsity": 0.22215039966550734, "compression_loss": 37.4702033996582, "distillation_loss": 0.8100709319114685, "epoch": 2.4, "learning_rate": 4.2218465295388375e-05, "loss": 38.1504, "step": 2840, "task_loss": 1.3120280504226685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3492907578200576, "compression/movement_sparsity/importance_threshold": -0.004628558717723969, "compression/movement_sparsity/linear_layer_sparsity": 0.23077462874819515, "compression/movement_sparsity/model_sparsity": 0.22284681030489698, "compression_loss": 37.53794479370117, "distillation_loss": 0.6628468036651611, "epoch": 2.4, "learning_rate": 4.221376913684606e-05, "loss": 38.3271, "step": 2841, "task_loss": 0.49185076355934143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34992477132742883, "compression/movement_sparsity/importance_threshold": -0.004624048917406908, "compression/movement_sparsity/linear_layer_sparsity": 0.23164636887155018, "compression/movement_sparsity/model_sparsity": 0.22368860347322847, "compression_loss": 37.60566711425781, "distillation_loss": 1.11928391456604, "epoch": 2.4, "learning_rate": 4.220907297830375e-05, "loss": 38.3894, "step": 2842, "task_loss": 0.7628251910209656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3505583728702224, "compression/movement_sparsity/importance_threshold": -0.004619542047434328, "compression/movement_sparsity/linear_layer_sparsity": 0.2323992369677433, "compression/movement_sparsity/model_sparsity": 0.22441560823422418, "compression_loss": 37.67332458496094, "distillation_loss": 0.7086113691329956, "epoch": 2.4, "learning_rate": 4.220437681976144e-05, "loss": 38.3376, "step": 2843, "task_loss": 0.5828056335449219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3511915625823233, "compression/movement_sparsity/importance_threshold": -0.004615038106853894, "compression/movement_sparsity/linear_layer_sparsity": 0.23332476701130372, "compression/movement_sparsity/model_sparsity": 0.22530934347352485, "compression_loss": 37.740943908691406, "distillation_loss": 0.48630592226982117, "epoch": 2.4, "learning_rate": 4.219968066121913e-05, "loss": 38.4735, "step": 2844, "task_loss": 0.3841204047203064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.351824340597617, "compression/movement_sparsity/importance_threshold": -0.004610537094713265, "compression/movement_sparsity/linear_layer_sparsity": 0.2341131333545488, "compression/movement_sparsity/model_sparsity": 0.2260706270075809, "compression_loss": 37.808536529541016, "distillation_loss": 0.6768912076950073, "epoch": 2.4, "learning_rate": 4.219498450267681e-05, "loss": 38.4119, "step": 2845, "task_loss": 0.42243510484695435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3524567070499883, "compression/movement_sparsity/importance_threshold": -0.004606039010060105, "compression/movement_sparsity/linear_layer_sparsity": 0.23494972103171333, "compression/movement_sparsity/model_sparsity": 0.2268784753243901, "compression_loss": 37.87609100341797, "distillation_loss": 0.529747724533081, "epoch": 2.41, "learning_rate": 4.21902883441345e-05, "loss": 38.4299, "step": 2846, "task_loss": 0.25175872445106506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35308866207332235, "compression/movement_sparsity/importance_threshold": -0.004601543851942075, "compression/movement_sparsity/linear_layer_sparsity": 0.23596303679740896, "compression/movement_sparsity/model_sparsity": 0.22785698057621018, "compression_loss": 37.94355010986328, "distillation_loss": 0.322802871465683, "epoch": 2.41, "learning_rate": 4.2185592185592186e-05, "loss": 38.5195, "step": 2847, "task_loss": 0.24807630479335785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35372020580150454, "compression/movement_sparsity/importance_threshold": -0.004597051619406835, "compression/movement_sparsity/linear_layer_sparsity": 0.2366913053357693, "compression/movement_sparsity/model_sparsity": 0.22856023084986174, "compression_loss": 38.01099395751953, "distillation_loss": 1.2113771438598633, "epoch": 2.41, "learning_rate": 4.218089602704988e-05, "loss": 38.9945, "step": 2848, "task_loss": 0.618395209312439 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3543513383684199, "compression/movement_sparsity/importance_threshold": -0.0045925623115020495, "compression/movement_sparsity/linear_layer_sparsity": 0.23755233755658733, "compression/movement_sparsity/model_sparsity": 0.22939168396504975, "compression_loss": 38.078407287597656, "distillation_loss": 0.5787268280982971, "epoch": 2.41, "learning_rate": 4.2176199868507565e-05, "loss": 38.8266, "step": 2849, "task_loss": 0.8007813096046448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3549820599079536, "compression/movement_sparsity/importance_threshold": -0.0045880759272753785, "compression/movement_sparsity/linear_layer_sparsity": 0.2383614757998541, "compression/movement_sparsity/model_sparsity": 0.2301730258204599, "compression_loss": 38.1457405090332, "distillation_loss": 0.6601641178131104, "epoch": 2.41, "learning_rate": 4.217150370996525e-05, "loss": 38.8446, "step": 2850, "task_loss": 0.6490051746368408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35561237055399075, "compression/movement_sparsity/importance_threshold": -0.004583592465774485, "compression/movement_sparsity/linear_layer_sparsity": 0.23911697913709473, "compression/movement_sparsity/model_sparsity": 0.2309025752938662, "compression_loss": 38.21304702758789, "distillation_loss": 0.6918537616729736, "epoch": 2.41, "learning_rate": 4.216680755142294e-05, "loss": 38.724, "step": 2851, "task_loss": 0.4863254129886627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35624227044041656, "compression/movement_sparsity/importance_threshold": -0.004579111926047029, "compression/movement_sparsity/linear_layer_sparsity": 0.24006387728905862, "compression/movement_sparsity/model_sparsity": 0.2318169445813107, "compression_loss": 38.280311584472656, "distillation_loss": 0.504257082939148, "epoch": 2.41, "learning_rate": 4.2162111392880624e-05, "loss": 38.8444, "step": 2852, "task_loss": 0.5489974617958069 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35687175970111595, "compression/movement_sparsity/importance_threshold": -0.004574634307140677, "compression/movement_sparsity/linear_layer_sparsity": 0.2409157517491323, "compression/movement_sparsity/model_sparsity": 0.2326395545330085, "compression_loss": 38.347530364990234, "distillation_loss": 0.7392163276672363, "epoch": 2.41, "learning_rate": 4.215741523433832e-05, "loss": 38.9946, "step": 2853, "task_loss": 0.6113112568855286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35750083846997427, "compression/movement_sparsity/importance_threshold": -0.004570159608103085, "compression/movement_sparsity/linear_layer_sparsity": 0.2417975916424749, "compression/movement_sparsity/model_sparsity": 0.23349110051315797, "compression_loss": 38.41470718383789, "distillation_loss": 0.586274266242981, "epoch": 2.41, "learning_rate": 4.2152719075796e-05, "loss": 39.0738, "step": 2854, "task_loss": 0.5201640129089355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35812950688087664, "compression/movement_sparsity/importance_threshold": -0.004565687827981918, "compression/movement_sparsity/linear_layer_sparsity": 0.24258228534208814, "compression/movement_sparsity/model_sparsity": 0.2342488375701893, "compression_loss": 38.481788635253906, "distillation_loss": 0.9122616052627563, "epoch": 2.41, "learning_rate": 4.214802291725369e-05, "loss": 39.3387, "step": 2855, "task_loss": 1.3775690793991089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35875776506770773, "compression/movement_sparsity/importance_threshold": -0.004561218965824841, "compression/movement_sparsity/linear_layer_sparsity": 0.24344795606411654, "compression/movement_sparsity/model_sparsity": 0.23508476983980137, "compression_loss": 38.54889678955078, "distillation_loss": 0.29219433665275574, "epoch": 2.41, "learning_rate": 4.2143326758711376e-05, "loss": 39.2171, "step": 2856, "task_loss": 0.06765390932559967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3593856131643538, "compression/movement_sparsity/importance_threshold": -0.004556753020679507, "compression/movement_sparsity/linear_layer_sparsity": 0.24435515866202062, "compression/movement_sparsity/model_sparsity": 0.23596080723758583, "compression_loss": 38.61592102050781, "distillation_loss": 1.30129075050354, "epoch": 2.41, "learning_rate": 4.213863060016906e-05, "loss": 39.4551, "step": 2857, "task_loss": 1.2454878091812134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36001305130469874, "compression/movement_sparsity/importance_threshold": -0.004552289991593587, "compression/movement_sparsity/linear_layer_sparsity": 0.24511091240678162, "compression/movement_sparsity/model_sparsity": 0.23669059851624383, "compression_loss": 38.682899475097656, "distillation_loss": 0.5598212480545044, "epoch": 2.42, "learning_rate": 4.213393444162675e-05, "loss": 39.3982, "step": 2858, "task_loss": 0.6762844324111938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36064007962262845, "compression/movement_sparsity/importance_threshold": -0.004547829877614738, "compression/movement_sparsity/linear_layer_sparsity": 0.24593354880781218, "compression/movement_sparsity/model_sparsity": 0.2374849748261734, "compression_loss": 38.749881744384766, "distillation_loss": 0.526597261428833, "epoch": 2.42, "learning_rate": 4.2129238283084435e-05, "loss": 39.484, "step": 2859, "task_loss": 0.6439964175224304 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36126669825202784, "compression/movement_sparsity/importance_threshold": -0.004543372677790622, "compression/movement_sparsity/linear_layer_sparsity": 0.2467699695466298, "compression/movement_sparsity/model_sparsity": 0.23829266193948145, "compression_loss": 38.8167839050293, "distillation_loss": 0.4770747721195221, "epoch": 2.42, "learning_rate": 4.212454212454213e-05, "loss": 39.521, "step": 2860, "task_loss": 0.8384709358215332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36189290732678214, "compression/movement_sparsity/importance_threshold": -0.004538918391168901, "compression/movement_sparsity/linear_layer_sparsity": 0.247527583461542, "compression/movement_sparsity/model_sparsity": 0.2390242494857234, "compression_loss": 38.88365173339844, "distillation_loss": 1.1175765991210938, "epoch": 2.42, "learning_rate": 4.2119845965999815e-05, "loss": 39.6421, "step": 2861, "task_loss": 1.5812039375305176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3625187069807764, "compression/movement_sparsity/importance_threshold": -0.004534467016797239, "compression/movement_sparsity/linear_layer_sparsity": 0.24817495844666063, "compression/movement_sparsity/model_sparsity": 0.23964938514854484, "compression_loss": 38.95047378540039, "distillation_loss": 1.001965880393982, "epoch": 2.42, "learning_rate": 4.21151498074575e-05, "loss": 39.7305, "step": 2862, "task_loss": 1.1766036748886108 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36314409734789577, "compression/movement_sparsity/importance_threshold": -0.004530018553723296, "compression/movement_sparsity/linear_layer_sparsity": 0.248952247238172, "compression/movement_sparsity/model_sparsity": 0.24039997167884775, "compression_loss": 39.01725387573242, "distillation_loss": 0.7553994655609131, "epoch": 2.42, "learning_rate": 4.211045364891519e-05, "loss": 39.81, "step": 2863, "task_loss": 0.47842174768447876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36376907856202545, "compression/movement_sparsity/importance_threshold": -0.0045255730009947336, "compression/movement_sparsity/linear_layer_sparsity": 0.24971542973937014, "compression/movement_sparsity/model_sparsity": 0.24113693651330576, "compression_loss": 39.083984375, "distillation_loss": 0.6600121259689331, "epoch": 2.42, "learning_rate": 4.2105757490372874e-05, "loss": 39.6071, "step": 2864, "task_loss": 0.2847423553466797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36439365075705055, "compression/movement_sparsity/importance_threshold": -0.004521130357659215, "compression/movement_sparsity/linear_layer_sparsity": 0.2504270759880461, "compression/movement_sparsity/model_sparsity": 0.24182413552405974, "compression_loss": 39.15066909790039, "distillation_loss": 0.4185677170753479, "epoch": 2.42, "learning_rate": 4.210106133183057e-05, "loss": 39.8543, "step": 2865, "task_loss": 0.4007049798965454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36501781406685607, "compression/movement_sparsity/importance_threshold": -0.004516690622764401, "compression/movement_sparsity/linear_layer_sparsity": 0.2513280064737737, "compression/movement_sparsity/model_sparsity": 0.24269411627601628, "compression_loss": 39.217288970947266, "distillation_loss": 0.5565626621246338, "epoch": 2.42, "learning_rate": 4.209636517328825e-05, "loss": 39.8695, "step": 2866, "task_loss": 1.560595989227295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36564156862532715, "compression/movement_sparsity/importance_threshold": -0.004512253795357956, "compression/movement_sparsity/linear_layer_sparsity": 0.2521967417068845, "compression/movement_sparsity/model_sparsity": 0.24353300778132755, "compression_loss": 39.28391647338867, "distillation_loss": 0.3029848635196686, "epoch": 2.42, "learning_rate": 4.209166901474594e-05, "loss": 39.8357, "step": 2867, "task_loss": 0.08451610803604126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36626491456634935, "compression/movement_sparsity/importance_threshold": -0.004507819874487537, "compression/movement_sparsity/linear_layer_sparsity": 0.25305207417557257, "compression/movement_sparsity/model_sparsity": 0.24435895694840576, "compression_loss": 39.3504638671875, "distillation_loss": 0.7819445133209229, "epoch": 2.42, "learning_rate": 4.2086972856203626e-05, "loss": 40.1382, "step": 2868, "task_loss": 0.34393346309661865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36688785202380747, "compression/movement_sparsity/importance_threshold": -0.004503388859200808, "compression/movement_sparsity/linear_layer_sparsity": 0.25381237102820287, "compression/movement_sparsity/model_sparsity": 0.24509313526520146, "compression_loss": 39.41695785522461, "distillation_loss": 0.4396437108516693, "epoch": 2.42, "learning_rate": 4.208227669766132e-05, "loss": 40.1237, "step": 2869, "task_loss": 0.7038557529449463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3675103811315866, "compression/movement_sparsity/importance_threshold": -0.004498960748545434, "compression/movement_sparsity/linear_layer_sparsity": 0.2544368516114606, "compression/movement_sparsity/model_sparsity": 0.24569616301929736, "compression_loss": 39.48345184326172, "distillation_loss": 0.6679707765579224, "epoch": 2.43, "learning_rate": 4.2077580539119006e-05, "loss": 40.1362, "step": 2870, "task_loss": 0.29178306460380554 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3681325020235722, "compression/movement_sparsity/importance_threshold": -0.004494535541569072, "compression/movement_sparsity/linear_layer_sparsity": 0.25508440545909383, "compression/movement_sparsity/model_sparsity": 0.2463214714001557, "compression_loss": 39.54984664916992, "distillation_loss": 0.8402762413024902, "epoch": 2.43, "learning_rate": 4.2072884380576685e-05, "loss": 40.4846, "step": 2871, "task_loss": 0.49126118421554565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.368754214833649, "compression/movement_sparsity/importance_threshold": -0.004490113237319386, "compression/movement_sparsity/linear_layer_sparsity": 0.2558767187018264, "compression/movement_sparsity/model_sparsity": 0.24708656624555975, "compression_loss": 39.61621856689453, "distillation_loss": 1.2904667854309082, "epoch": 2.43, "learning_rate": 4.206818822203438e-05, "loss": 40.4715, "step": 2872, "task_loss": 0.8943792581558228 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36937551969570237, "compression/movement_sparsity/importance_threshold": -0.004485693834844038, "compression/movement_sparsity/linear_layer_sparsity": 0.25660348479506456, "compression/movement_sparsity/model_sparsity": 0.2477883656877012, "compression_loss": 39.68254089355469, "distillation_loss": 0.6103184223175049, "epoch": 2.43, "learning_rate": 4.2063492063492065e-05, "loss": 40.191, "step": 2873, "task_loss": 0.26382020115852356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3699964167436175, "compression/movement_sparsity/importance_threshold": -0.004481277333190691, "compression/movement_sparsity/linear_layer_sparsity": 0.2574052181302295, "compression/movement_sparsity/model_sparsity": 0.24856255701638297, "compression_loss": 39.74880599975586, "distillation_loss": 0.30827316641807556, "epoch": 2.43, "learning_rate": 4.205879590494976e-05, "loss": 40.3385, "step": 2874, "task_loss": 0.41729119420051575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3706169061112794, "compression/movement_sparsity/importance_threshold": -0.004476863731407005, "compression/movement_sparsity/linear_layer_sparsity": 0.2582361179794317, "compression/movement_sparsity/model_sparsity": 0.24936491289961815, "compression_loss": 39.81502914428711, "distillation_loss": 0.8771607875823975, "epoch": 2.43, "learning_rate": 4.205409974640744e-05, "loss": 40.6446, "step": 2875, "task_loss": 1.4221326112747192 }, { "compression/movement_sparsity/importance_regularization_factor": 0.371236987932573, "compression/movement_sparsity/importance_threshold": -0.004472453028540643, "compression/movement_sparsity/linear_layer_sparsity": 0.2590391510488689, "compression/movement_sparsity/model_sparsity": 0.2501403593127015, "compression_loss": 39.881221771240234, "distillation_loss": 0.883097767829895, "epoch": 2.43, "learning_rate": 4.204940358786513e-05, "loss": 40.6208, "step": 2876, "task_loss": 1.148726224899292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37185666234138415, "compression/movement_sparsity/importance_threshold": -0.004468045223639265, "compression/movement_sparsity/linear_layer_sparsity": 0.2598330025092264, "compression/movement_sparsity/model_sparsity": 0.2509069395332231, "compression_loss": 39.9473991394043, "distillation_loss": 0.7720258831977844, "epoch": 2.43, "learning_rate": 4.204470742932282e-05, "loss": 40.5652, "step": 2877, "task_loss": 1.0045181512832642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37247592947159713, "compression/movement_sparsity/importance_threshold": -0.004463640315750536, "compression/movement_sparsity/linear_layer_sparsity": 0.2607277920486216, "compression/movement_sparsity/model_sparsity": 0.2517709902992454, "compression_loss": 40.01347351074219, "distillation_loss": 0.5860681533813477, "epoch": 2.43, "learning_rate": 4.20400112707805e-05, "loss": 40.76, "step": 2878, "task_loss": 0.4269542694091797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3730947894570976, "compression/movement_sparsity/importance_threshold": -0.004459238303922115, "compression/movement_sparsity/linear_layer_sparsity": 0.2616439735447555, "compression/movement_sparsity/model_sparsity": 0.2526556981424831, "compression_loss": 40.07955551147461, "distillation_loss": 0.5178368091583252, "epoch": 2.43, "learning_rate": 4.2035315112238196e-05, "loss": 40.656, "step": 2879, "task_loss": 0.46288740634918213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37371324243177073, "compression/movement_sparsity/importance_threshold": -0.004454839187201664, "compression/movement_sparsity/linear_layer_sparsity": 0.26239698473096024, "compression/movement_sparsity/model_sparsity": 0.25338284107790837, "compression_loss": 40.14555358886719, "distillation_loss": 1.135816216468811, "epoch": 2.43, "learning_rate": 4.2030618953695876e-05, "loss": 40.9836, "step": 2880, "task_loss": 0.5363074541091919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3743312885295015, "compression/movement_sparsity/importance_threshold": -0.004450442964636847, "compression/movement_sparsity/linear_layer_sparsity": 0.26329505341645526, "compression/movement_sparsity/model_sparsity": 0.25425005834127423, "compression_loss": 40.211490631103516, "distillation_loss": 0.6378007531166077, "epoch": 2.44, "learning_rate": 4.202592279515357e-05, "loss": 40.7312, "step": 2881, "task_loss": 0.2139068841934204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.374948927884175, "compression/movement_sparsity/importance_threshold": -0.004446049635275323, "compression/movement_sparsity/linear_layer_sparsity": 0.2640933048946705, "compression/movement_sparsity/model_sparsity": 0.255020887425504, "compression_loss": 40.27735137939453, "distillation_loss": 1.3497295379638672, "epoch": 2.44, "learning_rate": 4.2021226636611255e-05, "loss": 41.0884, "step": 2882, "task_loss": 1.2332146167755127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3755661606296764, "compression/movement_sparsity/importance_threshold": -0.004441659198164758, "compression/movement_sparsity/linear_layer_sparsity": 0.2649788055074773, "compression/movement_sparsity/model_sparsity": 0.25587596836814236, "compression_loss": 40.34318542480469, "distillation_loss": 0.41945314407348633, "epoch": 2.44, "learning_rate": 4.201653047806894e-05, "loss": 40.9721, "step": 2883, "task_loss": 0.22237937152385712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37618298689989105, "compression/movement_sparsity/importance_threshold": -0.0044372716523528085, "compression/movement_sparsity/linear_layer_sparsity": 0.26584751689225283, "compression/movement_sparsity/model_sparsity": 0.25671483684438207, "compression_loss": 40.40897750854492, "distillation_loss": 0.38752445578575134, "epoch": 2.44, "learning_rate": 4.201183431952663e-05, "loss": 40.8997, "step": 2884, "task_loss": 0.06671662628650665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37679940682870383, "compression/movement_sparsity/importance_threshold": -0.004432886996887139, "compression/movement_sparsity/linear_layer_sparsity": 0.2668503632387642, "compression/movement_sparsity/model_sparsity": 0.25768323233377455, "compression_loss": 40.47468185424805, "distillation_loss": 0.8969423174858093, "epoch": 2.44, "learning_rate": 4.2007138160984314e-05, "loss": 41.1981, "step": 2885, "task_loss": 1.138606309890747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3774154205499999, "compression/movement_sparsity/importance_threshold": -0.004428505230815415, "compression/movement_sparsity/linear_layer_sparsity": 0.26761257988238385, "compression/movement_sparsity/model_sparsity": 0.25841926449083313, "compression_loss": 40.54033660888672, "distillation_loss": 1.033566951751709, "epoch": 2.44, "learning_rate": 4.200244200244201e-05, "loss": 41.5904, "step": 2886, "task_loss": 1.3841004371643066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3780310281976643, "compression/movement_sparsity/importance_threshold": -0.004424126353185293, "compression/movement_sparsity/linear_layer_sparsity": 0.2686020353886401, "compression/movement_sparsity/model_sparsity": 0.25937472915652837, "compression_loss": 40.60597229003906, "distillation_loss": 0.24756751954555511, "epoch": 2.44, "learning_rate": 4.1997745843899694e-05, "loss": 41.2964, "step": 2887, "task_loss": 0.19874149560928345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3786462299055826, "compression/movement_sparsity/importance_threshold": -0.004419750363044437, "compression/movement_sparsity/linear_layer_sparsity": 0.269487762560632, "compression/movement_sparsity/model_sparsity": 0.26023002887534685, "compression_loss": 40.67157745361328, "distillation_loss": 1.0322494506835938, "epoch": 2.44, "learning_rate": 4.199304968535738e-05, "loss": 41.564, "step": 2888, "task_loss": 0.7601196765899658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37926102580763965, "compression/movement_sparsity/importance_threshold": -0.004415377259440507, "compression/movement_sparsity/linear_layer_sparsity": 0.270255893591399, "compression/movement_sparsity/model_sparsity": 0.26097177224215956, "compression_loss": 40.73715591430664, "distillation_loss": 0.5347920656204224, "epoch": 2.44, "learning_rate": 4.1988353526815066e-05, "loss": 41.5197, "step": 2889, "task_loss": 0.8673065900802612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3798754160377207, "compression/movement_sparsity/importance_threshold": -0.004411007041421167, "compression/movement_sparsity/linear_layer_sparsity": 0.2712093142630597, "compression/movement_sparsity/model_sparsity": 0.2618924399806837, "compression_loss": 40.80265426635742, "distillation_loss": 0.6879467964172363, "epoch": 2.44, "learning_rate": 4.198365736827275e-05, "loss": 41.8017, "step": 2890, "task_loss": 0.4972626864910126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3804894007297107, "compression/movement_sparsity/importance_threshold": -0.004406639708034078, "compression/movement_sparsity/linear_layer_sparsity": 0.27218634478663944, "compression/movement_sparsity/model_sparsity": 0.262835906500081, "compression_loss": 40.86815643310547, "distillation_loss": 0.988438606262207, "epoch": 2.44, "learning_rate": 4.1978961209730446e-05, "loss": 41.6642, "step": 2891, "task_loss": 0.35966598987579346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.381102980017495, "compression/movement_sparsity/importance_threshold": -0.004402275258326902, "compression/movement_sparsity/linear_layer_sparsity": 0.2728552905910114, "compression/movement_sparsity/model_sparsity": 0.2634818719581547, "compression_loss": 40.93360900878906, "distillation_loss": 0.6301605701446533, "epoch": 2.44, "learning_rate": 4.1974265051188125e-05, "loss": 41.817, "step": 2892, "task_loss": 0.5049206018447876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3817161540349585, "compression/movement_sparsity/importance_threshold": -0.004397913691347302, "compression/movement_sparsity/linear_layer_sparsity": 0.27372834237280635, "compression/movement_sparsity/model_sparsity": 0.2643249317254236, "compression_loss": 40.999027252197266, "distillation_loss": 0.5377941131591797, "epoch": 2.45, "learning_rate": 4.196956889264582e-05, "loss": 41.6869, "step": 2893, "task_loss": 0.471804678440094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.38232892291598664, "compression/movement_sparsity/importance_threshold": -0.004393555006142939, "compression/movement_sparsity/linear_layer_sparsity": 0.2746532285113144, "compression/movement_sparsity/model_sparsity": 0.26521804517979136, "compression_loss": 41.064422607421875, "distillation_loss": 0.49639832973480225, "epoch": 2.45, "learning_rate": 4.1964872734103505e-05, "loss": 41.6056, "step": 2894, "task_loss": 0.9741214513778687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3829412867944644, "compression/movement_sparsity/importance_threshold": -0.004389199201761474, "compression/movement_sparsity/linear_layer_sparsity": 0.2753464399968253, "compression/movement_sparsity/model_sparsity": 0.26588744271820697, "compression_loss": 41.12973403930664, "distillation_loss": 1.1778508424758911, "epoch": 2.45, "learning_rate": 4.196017657556119e-05, "loss": 41.9804, "step": 2895, "task_loss": 0.5483050346374512 }, { "compression/movement_sparsity/importance_regularization_factor": 0.38355324580427663, "compression/movement_sparsity/importance_threshold": -0.00438484627725057, "compression/movement_sparsity/linear_layer_sparsity": 0.27626872666678876, "compression/movement_sparsity/model_sparsity": 0.26677804600377153, "compression_loss": 41.195037841796875, "distillation_loss": 0.4962417483329773, "epoch": 2.45, "learning_rate": 4.1955480417018884e-05, "loss": 42.0754, "step": 2896, "task_loss": 0.5418307781219482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3841648000793093, "compression/movement_sparsity/importance_threshold": -0.004380496231657886, "compression/movement_sparsity/linear_layer_sparsity": 0.2770108630121096, "compression/movement_sparsity/model_sparsity": 0.2674946876825522, "compression_loss": 41.26029968261719, "distillation_loss": 0.57588130235672, "epoch": 2.45, "learning_rate": 4.1950784258476564e-05, "loss": 42.0877, "step": 2897, "task_loss": 0.9692898988723755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3847759497534464, "compression/movement_sparsity/importance_threshold": -0.0043761490640310905, "compression/movement_sparsity/linear_layer_sparsity": 0.27781298984480646, "compression/movement_sparsity/model_sparsity": 0.26826925899091514, "compression_loss": 41.32549285888672, "distillation_loss": 0.8748171925544739, "epoch": 2.45, "learning_rate": 4.194608809993426e-05, "loss": 42.1234, "step": 2898, "task_loss": 1.0364279747009277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3853866949605741, "compression/movement_sparsity/importance_threshold": -0.0043718047734178376, "compression/movement_sparsity/linear_layer_sparsity": 0.2786048261208336, "compression/movement_sparsity/model_sparsity": 0.2690338932548874, "compression_loss": 41.39067840576172, "distillation_loss": 1.0715327262878418, "epoch": 2.45, "learning_rate": 4.194139194139194e-05, "loss": 42.1918, "step": 2899, "task_loss": 0.23103930056095123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.385997035834577, "compression/movement_sparsity/importance_threshold": -0.004367463358865793, "compression/movement_sparsity/linear_layer_sparsity": 0.27931671085286225, "compression/movement_sparsity/model_sparsity": 0.2697213225563573, "compression_loss": 41.455814361572266, "distillation_loss": 0.9624485373497009, "epoch": 2.45, "learning_rate": 4.1936695782849636e-05, "loss": 42.3738, "step": 2900, "task_loss": 1.2449663877487183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3866069725093405, "compression/movement_sparsity/importance_threshold": -0.004363124819422619, "compression/movement_sparsity/linear_layer_sparsity": 0.2802036781382883, "compression/movement_sparsity/model_sparsity": 0.27057781978689843, "compression_loss": 41.520931243896484, "distillation_loss": 0.6062524318695068, "epoch": 2.45, "learning_rate": 4.1931999624307316e-05, "loss": 42.2875, "step": 2901, "task_loss": 1.085412859916687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3872165051187495, "compression/movement_sparsity/importance_threshold": -0.004358789154135977, "compression/movement_sparsity/linear_layer_sparsity": 0.2809792260013248, "compression/movement_sparsity/model_sparsity": 0.27132672519497536, "compression_loss": 41.58599853515625, "distillation_loss": 0.9530068635940552, "epoch": 2.45, "learning_rate": 4.1927303465765e-05, "loss": 42.3947, "step": 2902, "task_loss": 0.8704487085342407 }, { "compression/movement_sparsity/importance_regularization_factor": 0.38782563379668933, "compression/movement_sparsity/importance_threshold": -0.0043544563620535265, "compression/movement_sparsity/linear_layer_sparsity": 0.2817988813604464, "compression/movement_sparsity/model_sparsity": 0.2721182228709563, "compression_loss": 41.65098190307617, "distillation_loss": 0.9211289882659912, "epoch": 2.45, "learning_rate": 4.1922607307222695e-05, "loss": 42.5718, "step": 2903, "task_loss": 0.9204097986221313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.388434358677045, "compression/movement_sparsity/importance_threshold": -0.004350126442222933, "compression/movement_sparsity/linear_layer_sparsity": 0.28260499086513363, "compression/movement_sparsity/model_sparsity": 0.2728966400342746, "compression_loss": 41.7159423828125, "distillation_loss": 0.6705855131149292, "epoch": 2.45, "learning_rate": 4.191791114868038e-05, "loss": 42.5432, "step": 2904, "task_loss": 1.604060411453247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3890426798937018, "compression/movement_sparsity/importance_threshold": -0.004345799393691855, "compression/movement_sparsity/linear_layer_sparsity": 0.28337597177196566, "compression/movement_sparsity/model_sparsity": 0.27364113537514223, "compression_loss": 41.78087615966797, "distillation_loss": 0.5236571431159973, "epoch": 2.46, "learning_rate": 4.191321499013807e-05, "loss": 42.2921, "step": 2905, "task_loss": 0.9844614267349243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.38965059758054466, "compression/movement_sparsity/importance_threshold": -0.004341475215507958, "compression/movement_sparsity/linear_layer_sparsity": 0.2841973680595621, "compression/movement_sparsity/model_sparsity": 0.2744343141733492, "compression_loss": 41.84572982788086, "distillation_loss": 0.9856996536254883, "epoch": 2.46, "learning_rate": 4.1908518831595754e-05, "loss": 42.7527, "step": 2906, "task_loss": 0.4859431982040405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39025811187145865, "compression/movement_sparsity/importance_threshold": -0.004337153906718901, "compression/movement_sparsity/linear_layer_sparsity": 0.28505671897274343, "compression/movement_sparsity/model_sparsity": 0.27526414373899016, "compression_loss": 41.910587310791016, "distillation_loss": 0.6351561546325684, "epoch": 2.46, "learning_rate": 4.190382267305345e-05, "loss": 42.754, "step": 2907, "task_loss": 0.7431890964508057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3908652229003293, "compression/movement_sparsity/importance_threshold": -0.004332835466372347, "compression/movement_sparsity/linear_layer_sparsity": 0.28590998856043054, "compression/movement_sparsity/model_sparsity": 0.2760881008913759, "compression_loss": 41.9753532409668, "distillation_loss": 0.6046701669692993, "epoch": 2.46, "learning_rate": 4.1899126514511134e-05, "loss": 42.7985, "step": 2908, "task_loss": 1.4191614389419556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3914719308010416, "compression/movement_sparsity/importance_threshold": -0.004328519893515957, "compression/movement_sparsity/linear_layer_sparsity": 0.28679074335451826, "compression/movement_sparsity/model_sparsity": 0.27693859904876805, "compression_loss": 42.04011154174805, "distillation_loss": 0.887492835521698, "epoch": 2.46, "learning_rate": 4.189443035596882e-05, "loss": 42.9028, "step": 2909, "task_loss": 1.3620884418487549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39207823570748057, "compression/movement_sparsity/importance_threshold": -0.004324207187197395, "compression/movement_sparsity/linear_layer_sparsity": 0.28760430546397797, "compression/movement_sparsity/model_sparsity": 0.277724212796958, "compression_loss": 42.10482406616211, "distillation_loss": 1.2200744152069092, "epoch": 2.46, "learning_rate": 4.1889734197426507e-05, "loss": 43.0304, "step": 2910, "task_loss": 1.1718554496765137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3926841377535315, "compression/movement_sparsity/importance_threshold": -0.004319897346464319, "compression/movement_sparsity/linear_layer_sparsity": 0.288564558683694, "compression/movement_sparsity/model_sparsity": 0.2786514783644924, "compression_loss": 42.16946792602539, "distillation_loss": 0.8713397979736328, "epoch": 2.46, "learning_rate": 4.188503803888419e-05, "loss": 42.9838, "step": 2911, "task_loss": 0.7317988872528076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3932896370730794, "compression/movement_sparsity/importance_threshold": -0.004315590370364393, "compression/movement_sparsity/linear_layer_sparsity": 0.28926670137076416, "compression/movement_sparsity/model_sparsity": 0.27932950029021814, "compression_loss": 42.234092712402344, "distillation_loss": 0.2644381821155548, "epoch": 2.46, "learning_rate": 4.1880341880341886e-05, "loss": 43.0646, "step": 2912, "task_loss": 0.11448072642087936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3938947338000094, "compression/movement_sparsity/importance_threshold": -0.004311286257945281, "compression/movement_sparsity/linear_layer_sparsity": 0.2901304046051326, "compression/movement_sparsity/model_sparsity": 0.2801635326614241, "compression_loss": 42.298683166503906, "distillation_loss": 1.187920331954956, "epoch": 2.46, "learning_rate": 4.187564572179957e-05, "loss": 43.2004, "step": 2913, "task_loss": 0.30098220705986023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3944994280682067, "compression/movement_sparsity/importance_threshold": -0.004306985008254642, "compression/movement_sparsity/linear_layer_sparsity": 0.2909763766022266, "compression/movement_sparsity/model_sparsity": 0.2809804429179036, "compression_loss": 42.36323928833008, "distillation_loss": 0.5736446976661682, "epoch": 2.46, "learning_rate": 4.187094956325726e-05, "loss": 43.2676, "step": 2914, "task_loss": 0.9652964472770691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3951037200115566, "compression/movement_sparsity/importance_threshold": -0.004302686620340138, "compression/movement_sparsity/linear_layer_sparsity": 0.29169146893534925, "compression/movement_sparsity/model_sparsity": 0.28167096962950217, "compression_loss": 42.427764892578125, "distillation_loss": 0.5976279973983765, "epoch": 2.46, "learning_rate": 4.1866253404714945e-05, "loss": 43.1533, "step": 2915, "task_loss": 0.4131671190261841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3957076097639439, "compression/movement_sparsity/importance_threshold": -0.004298391093249432, "compression/movement_sparsity/linear_layer_sparsity": 0.2925309422610817, "compression/movement_sparsity/model_sparsity": 0.28248160446397363, "compression_loss": 42.49223327636719, "distillation_loss": 0.9148129224777222, "epoch": 2.46, "learning_rate": 4.186155724617263e-05, "loss": 43.2806, "step": 2916, "task_loss": 2.0078916549682617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39631109745925397, "compression/movement_sparsity/importance_threshold": -0.004294098426030186, "compression/movement_sparsity/linear_layer_sparsity": 0.29337805897826863, "compression/movement_sparsity/model_sparsity": 0.28329962011588944, "compression_loss": 42.55663299560547, "distillation_loss": 0.5069748759269714, "epoch": 2.47, "learning_rate": 4.1856861087630324e-05, "loss": 43.3729, "step": 2917, "task_loss": 0.6330441236495972 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39691418323137173, "compression/movement_sparsity/importance_threshold": -0.0042898086177300615, "compression/movement_sparsity/linear_layer_sparsity": 0.29415020845352896, "compression/movement_sparsity/model_sparsity": 0.2840452438812649, "compression_loss": 42.621002197265625, "distillation_loss": 1.5659128427505493, "epoch": 2.47, "learning_rate": 4.1852164929088004e-05, "loss": 43.6358, "step": 2918, "task_loss": 0.6822737455368042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39751686721418267, "compression/movement_sparsity/importance_threshold": -0.00428552166739672, "compression/movement_sparsity/linear_layer_sparsity": 0.294924265795611, "compression/movement_sparsity/model_sparsity": 0.28479270997236755, "compression_loss": 42.68532180786133, "distillation_loss": 0.5952969789505005, "epoch": 2.47, "learning_rate": 4.18474687705457e-05, "loss": 43.5196, "step": 2919, "task_loss": 0.1926359087228775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3981191495415718, "compression/movement_sparsity/importance_threshold": -0.004281237574077822, "compression/movement_sparsity/linear_layer_sparsity": 0.29567236422474974, "compression/movement_sparsity/model_sparsity": 0.2855151089190454, "compression_loss": 42.74961471557617, "distillation_loss": 1.1965268850326538, "epoch": 2.47, "learning_rate": 4.1842772612003383e-05, "loss": 43.5218, "step": 2920, "task_loss": 0.2831389904022217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39872103034742423, "compression/movement_sparsity/importance_threshold": -0.004276956336821031, "compression/movement_sparsity/linear_layer_sparsity": 0.29649069600126376, "compression/movement_sparsity/model_sparsity": 0.28630532848155316, "compression_loss": 42.8138542175293, "distillation_loss": 0.4735006093978882, "epoch": 2.47, "learning_rate": 4.183807645346107e-05, "loss": 43.7261, "step": 2921, "task_loss": 0.6643648147583008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.399322509765625, "compression/movement_sparsity/importance_threshold": -0.004272677954674009, "compression/movement_sparsity/linear_layer_sparsity": 0.2972542719999939, "compression/movement_sparsity/model_sparsity": 0.2870426732956924, "compression_loss": 42.878047943115234, "distillation_loss": 1.2022541761398315, "epoch": 2.47, "learning_rate": 4.1833380294918756e-05, "loss": 43.8029, "step": 2922, "task_loss": 0.9173487424850464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39992358793005933, "compression/movement_sparsity/importance_threshold": -0.004268402426684418, "compression/movement_sparsity/linear_layer_sparsity": 0.2980239293242183, "compression/movement_sparsity/model_sparsity": 0.28778589052308684, "compression_loss": 42.942203521728516, "distillation_loss": 0.7392959594726562, "epoch": 2.47, "learning_rate": 4.182868413637644e-05, "loss": 43.6694, "step": 2923, "task_loss": 0.8415040969848633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40052426497461247, "compression/movement_sparsity/importance_threshold": -0.004264129751899918, "compression/movement_sparsity/linear_layer_sparsity": 0.29881189024576377, "compression/movement_sparsity/model_sparsity": 0.2885467825629259, "compression_loss": 43.00629806518555, "distillation_loss": 0.7537930011749268, "epoch": 2.47, "learning_rate": 4.1823987977834136e-05, "loss": 43.7219, "step": 2924, "task_loss": 1.6594547033309937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4011245410331694, "compression/movement_sparsity/importance_threshold": -0.004259859929368173, "compression/movement_sparsity/linear_layer_sparsity": 0.29983343368712817, "compression/movement_sparsity/model_sparsity": 0.2895332328444442, "compression_loss": 43.07032012939453, "distillation_loss": 0.790442705154419, "epoch": 2.47, "learning_rate": 4.181929181929182e-05, "loss": 43.7723, "step": 2925, "task_loss": 0.2712479829788208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4017244162396153, "compression/movement_sparsity/importance_threshold": -0.004255592958136844, "compression/movement_sparsity/linear_layer_sparsity": 0.3007358666179779, "compression/movement_sparsity/model_sparsity": 0.29040466442791085, "compression_loss": 43.13431930541992, "distillation_loss": 0.9326821565628052, "epoch": 2.47, "learning_rate": 4.181459566074951e-05, "loss": 43.9179, "step": 2926, "task_loss": 0.9961165189743042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40232389072783514, "compression/movement_sparsity/importance_threshold": -0.004251328837253595, "compression/movement_sparsity/linear_layer_sparsity": 0.30174797804274234, "compression/movement_sparsity/model_sparsity": 0.2913820067116156, "compression_loss": 43.19826889038086, "distillation_loss": 0.6597263813018799, "epoch": 2.47, "learning_rate": 4.1809899502207195e-05, "loss": 43.8793, "step": 2927, "task_loss": 0.9838933944702148 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4029229646317144, "compression/movement_sparsity/importance_threshold": -0.0042470675657660835, "compression/movement_sparsity/linear_layer_sparsity": 0.3026458678657228, "compression/movement_sparsity/model_sparsity": 0.29224905125694456, "compression_loss": 43.26211929321289, "distillation_loss": 0.7417039275169373, "epoch": 2.47, "learning_rate": 4.180520334366488e-05, "loss": 43.9133, "step": 2928, "task_loss": 0.6979203224182129 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4035216380851381, "compression/movement_sparsity/importance_threshold": -0.004242809142721974, "compression/movement_sparsity/linear_layer_sparsity": 0.30356797567317173, "compression/movement_sparsity/model_sparsity": 0.2931394818244722, "compression_loss": 43.32592010498047, "distillation_loss": 0.4007969796657562, "epoch": 2.48, "learning_rate": 4.1800507185122574e-05, "loss": 44.1787, "step": 2929, "task_loss": 0.5585443377494812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4041199112219914, "compression/movement_sparsity/importance_threshold": -0.004238553567168927, "compression/movement_sparsity/linear_layer_sparsity": 0.30445344051347567, "compression/movement_sparsity/model_sparsity": 0.2939945282235032, "compression_loss": 43.38971710205078, "distillation_loss": 0.240255206823349, "epoch": 2.48, "learning_rate": 4.179581102658026e-05, "loss": 43.9921, "step": 2930, "task_loss": 0.6276826858520508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40471778417615933, "compression/movement_sparsity/importance_threshold": -0.004234300838154606, "compression/movement_sparsity/linear_layer_sparsity": 0.30519872483905236, "compression/movement_sparsity/model_sparsity": 0.2947142097397336, "compression_loss": 43.45340347290039, "distillation_loss": 0.5588897466659546, "epoch": 2.48, "learning_rate": 4.179111486803795e-05, "loss": 44.2935, "step": 2931, "task_loss": 0.5142179727554321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40531525708152716, "compression/movement_sparsity/importance_threshold": -0.004230050954726672, "compression/movement_sparsity/linear_layer_sparsity": 0.3059197196351548, "compression/movement_sparsity/model_sparsity": 0.2954104361465505, "compression_loss": 43.51708984375, "distillation_loss": 0.8034572601318359, "epoch": 2.48, "learning_rate": 4.178641870949563e-05, "loss": 44.4712, "step": 2932, "task_loss": 1.5662293434143066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4059123300719799, "compression/movement_sparsity/importance_threshold": -0.004225803915932788, "compression/movement_sparsity/linear_layer_sparsity": 0.3067907085359488, "compression/movement_sparsity/model_sparsity": 0.29625150389912697, "compression_loss": 43.58070373535156, "distillation_loss": 0.7673352956771851, "epoch": 2.48, "learning_rate": 4.178172255095332e-05, "loss": 44.3867, "step": 2933, "task_loss": 0.7774590849876404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4065090032814027, "compression/movement_sparsity/importance_threshold": -0.004221559720820614, "compression/movement_sparsity/linear_layer_sparsity": 0.3076442166069886, "compression/movement_sparsity/model_sparsity": 0.2970756913422286, "compression_loss": 43.644325256347656, "distillation_loss": 0.7542609572410583, "epoch": 2.48, "learning_rate": 4.177702639241101e-05, "loss": 44.6056, "step": 2934, "task_loss": 0.8226514458656311 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4071052768436808, "compression/movement_sparsity/importance_threshold": -0.004217318368437813, "compression/movement_sparsity/linear_layer_sparsity": 0.3085141919535335, "compression/movement_sparsity/model_sparsity": 0.29791578035926247, "compression_loss": 43.70786666870117, "distillation_loss": 0.524569571018219, "epoch": 2.48, "learning_rate": 4.177233023386869e-05, "loss": 44.3565, "step": 2935, "task_loss": 0.03765937685966492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4077011508926993, "compression/movement_sparsity/importance_threshold": -0.0042130798578320465, "compression/movement_sparsity/linear_layer_sparsity": 0.30926386437280023, "compression/movement_sparsity/model_sparsity": 0.29863969922466527, "compression_loss": 43.77135467529297, "distillation_loss": 0.6079040765762329, "epoch": 2.48, "learning_rate": 4.1767634075326385e-05, "loss": 44.4629, "step": 2936, "task_loss": 0.5487832427024841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40829662556234336, "compression/movement_sparsity/importance_threshold": -0.004208844188050976, "compression/movement_sparsity/linear_layer_sparsity": 0.31021994413384374, "compression/movement_sparsity/model_sparsity": 0.29956293470467155, "compression_loss": 43.834808349609375, "distillation_loss": 0.685642659664154, "epoch": 2.48, "learning_rate": 4.176293791678407e-05, "loss": 44.7662, "step": 2937, "task_loss": 0.3598397672176361 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40889170098649785, "compression/movement_sparsity/importance_threshold": -0.0042046113581422654, "compression/movement_sparsity/linear_layer_sparsity": 0.31105983480544336, "compression/movement_sparsity/model_sparsity": 0.30037397254789583, "compression_loss": 43.8982048034668, "distillation_loss": 0.5516096353530884, "epoch": 2.48, "learning_rate": 4.1758241758241765e-05, "loss": 44.6363, "step": 2938, "task_loss": 0.07959622889757156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40948637729904847, "compression/movement_sparsity/importance_threshold": -0.004200381367153573, "compression/movement_sparsity/linear_layer_sparsity": 0.3118674586794204, "compression/movement_sparsity/model_sparsity": 0.3011538520572601, "compression_loss": 43.96158981323242, "distillation_loss": 1.6964751482009888, "epoch": 2.48, "learning_rate": 4.1753545599699444e-05, "loss": 45.1668, "step": 2939, "task_loss": 1.930273175239563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4100806546338799, "compression/movement_sparsity/importance_threshold": -0.004196154214132563, "compression/movement_sparsity/linear_layer_sparsity": 0.3127378394476649, "compression/movement_sparsity/model_sparsity": 0.301994332568511, "compression_loss": 44.02494430541992, "distillation_loss": 0.80135577917099, "epoch": 2.48, "learning_rate": 4.174884944115714e-05, "loss": 45.0958, "step": 2940, "task_loss": 0.8234436511993408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41067453312487745, "compression/movement_sparsity/importance_threshold": -0.004191929898126898, "compression/movement_sparsity/linear_layer_sparsity": 0.31360464296561874, "compression/movement_sparsity/model_sparsity": 0.30283135871902356, "compression_loss": 44.08826446533203, "distillation_loss": 0.35949623584747314, "epoch": 2.49, "learning_rate": 4.1744153282614824e-05, "loss": 44.6477, "step": 2941, "task_loss": 0.9053521156311035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4112680129059263, "compression/movement_sparsity/importance_threshold": -0.004187708418184237, "compression/movement_sparsity/linear_layer_sparsity": 0.3144363059615496, "compression/movement_sparsity/model_sparsity": 0.3036344515325496, "compression_loss": 44.151554107666016, "distillation_loss": 0.8219202756881714, "epoch": 2.49, "learning_rate": 4.173945712407251e-05, "loss": 44.9083, "step": 2942, "task_loss": 0.7824178338050842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41186109411091154, "compression/movement_sparsity/importance_threshold": -0.004183489773352244, "compression/movement_sparsity/linear_layer_sparsity": 0.3152303005119188, "compression/movement_sparsity/model_sparsity": 0.30440116992750066, "compression_loss": 44.214778900146484, "distillation_loss": 1.006207823753357, "epoch": 2.49, "learning_rate": 4.17347609655302e-05, "loss": 44.9948, "step": 2943, "task_loss": 0.9096312522888184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4124537768737182, "compression/movement_sparsity/importance_threshold": -0.00417927396267858, "compression/movement_sparsity/linear_layer_sparsity": 0.31606365673965403, "compression/movement_sparsity/model_sparsity": 0.30520589780510954, "compression_loss": 44.27799606323242, "distillation_loss": 0.5638241767883301, "epoch": 2.49, "learning_rate": 4.173006480698788e-05, "loss": 44.9377, "step": 2944, "task_loss": 0.63136225938797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41304606132823163, "compression/movement_sparsity/importance_threshold": -0.004175060985210907, "compression/movement_sparsity/linear_layer_sparsity": 0.3168116240029488, "compression/movement_sparsity/model_sparsity": 0.3059281700918937, "compression_loss": 44.34119415283203, "distillation_loss": 1.003166913986206, "epoch": 2.49, "learning_rate": 4.1725368648445576e-05, "loss": 45.0995, "step": 2945, "task_loss": 0.7501367926597595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4136379476083367, "compression/movement_sparsity/importance_threshold": -0.004170850839996888, "compression/movement_sparsity/linear_layer_sparsity": 0.31755848231865347, "compression/movement_sparsity/model_sparsity": 0.30664937152684896, "compression_loss": 44.404327392578125, "distillation_loss": 0.5530438423156738, "epoch": 2.49, "learning_rate": 4.172067248990326e-05, "loss": 45.2704, "step": 2946, "task_loss": 0.7010937929153442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4142294358479186, "compression/movement_sparsity/importance_threshold": -0.004166643526084186, "compression/movement_sparsity/linear_layer_sparsity": 0.3182560342011838, "compression/movement_sparsity/model_sparsity": 0.3073229603562938, "compression_loss": 44.46744918823242, "distillation_loss": 0.494962602853775, "epoch": 2.49, "learning_rate": 4.171597633136095e-05, "loss": 45.2614, "step": 2947, "task_loss": 0.9796982407569885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41482052618086307, "compression/movement_sparsity/importance_threshold": -0.004162439042520456, "compression/movement_sparsity/linear_layer_sparsity": 0.3192437607031329, "compression/movement_sparsity/model_sparsity": 0.30827675541429883, "compression_loss": 44.53047561645508, "distillation_loss": 1.0593167543411255, "epoch": 2.49, "learning_rate": 4.1711280172818635e-05, "loss": 45.641, "step": 2948, "task_loss": 0.4458463788032532 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41541121874105424, "compression/movement_sparsity/importance_threshold": -0.004158237388353369, "compression/movement_sparsity/linear_layer_sparsity": 0.32014453617468125, "compression/movement_sparsity/model_sparsity": 0.30914658647729, "compression_loss": 44.59352493286133, "distillation_loss": 1.0590797662734985, "epoch": 2.49, "learning_rate": 4.170658401427632e-05, "loss": 45.6289, "step": 2949, "task_loss": 1.1705948114395142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41600151366237803, "compression/movement_sparsity/importance_threshold": -0.004154038562630581, "compression/movement_sparsity/linear_layer_sparsity": 0.3208374972526718, "compression/movement_sparsity/model_sparsity": 0.30981574221045394, "compression_loss": 44.65651321411133, "distillation_loss": 0.9862306118011475, "epoch": 2.49, "learning_rate": 4.1701887855734014e-05, "loss": 45.5922, "step": 2950, "task_loss": 0.9927714467048645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41659141107871933, "compression/movement_sparsity/importance_threshold": -0.004149842564399754, "compression/movement_sparsity/linear_layer_sparsity": 0.32153811364628454, "compression/movement_sparsity/model_sparsity": 0.310492290275598, "compression_loss": 44.719478607177734, "distillation_loss": 0.9730613827705383, "epoch": 2.49, "learning_rate": 4.16971916971917e-05, "loss": 45.5911, "step": 2951, "task_loss": 1.0644370317459106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41718091112396327, "compression/movement_sparsity/importance_threshold": -0.004145649392708553, "compression/movement_sparsity/linear_layer_sparsity": 0.32222711590062, "compression/movement_sparsity/model_sparsity": 0.31115762318287815, "compression_loss": 44.78240203857422, "distillation_loss": 1.3740832805633545, "epoch": 2.5, "learning_rate": 4.169249553864939e-05, "loss": 45.7673, "step": 2952, "task_loss": 1.0096755027770996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.417770013931995, "compression/movement_sparsity/importance_threshold": -0.004141459046604637, "compression/movement_sparsity/linear_layer_sparsity": 0.3229668077915755, "compression/movement_sparsity/model_sparsity": 0.3118719043818209, "compression_loss": 44.84528350830078, "distillation_loss": 0.7403881549835205, "epoch": 2.5, "learning_rate": 4.168779938010707e-05, "loss": 45.63, "step": 2953, "task_loss": 0.3452562093734741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41835871963669957, "compression/movement_sparsity/importance_threshold": -0.004137271525135669, "compression/movement_sparsity/linear_layer_sparsity": 0.32363404843997545, "compression/movement_sparsity/model_sparsity": 0.31251622326127604, "compression_loss": 44.90812301635742, "distillation_loss": 0.8234454393386841, "epoch": 2.5, "learning_rate": 4.168310322156476e-05, "loss": 45.7479, "step": 2954, "task_loss": 0.4591831564903259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4189470283719622, "compression/movement_sparsity/importance_threshold": -0.004133086827349311, "compression/movement_sparsity/linear_layer_sparsity": 0.3243428686609217, "compression/movement_sparsity/model_sparsity": 0.3132006933270467, "compression_loss": 44.970890045166016, "distillation_loss": 0.9198955297470093, "epoch": 2.5, "learning_rate": 4.167840706302245e-05, "loss": 45.8439, "step": 2955, "task_loss": 2.1602275371551514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41953494027166816, "compression/movement_sparsity/importance_threshold": -0.0041289049522932235, "compression/movement_sparsity/linear_layer_sparsity": 0.3251699527764804, "compression/movement_sparsity/model_sparsity": 0.31399936455882765, "compression_loss": 45.03361892700195, "distillation_loss": 1.0704150199890137, "epoch": 2.5, "learning_rate": 4.167371090448014e-05, "loss": 46.0515, "step": 2956, "task_loss": 0.5061588883399963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4201224554697024, "compression/movement_sparsity/importance_threshold": -0.00412472589901507, "compression/movement_sparsity/linear_layer_sparsity": 0.3257989883917751, "compression/movement_sparsity/model_sparsity": 0.31460679086559706, "compression_loss": 45.096309661865234, "distillation_loss": 1.4239180088043213, "epoch": 2.5, "learning_rate": 4.1669014745937825e-05, "loss": 46.3282, "step": 2957, "task_loss": 1.6169211864471436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4207095740999499, "compression/movement_sparsity/importance_threshold": -0.004120549666562514, "compression/movement_sparsity/linear_layer_sparsity": 0.3266774298973415, "compression/movement_sparsity/model_sparsity": 0.31545505520304506, "compression_loss": 45.158935546875, "distillation_loss": 0.62267005443573, "epoch": 2.5, "learning_rate": 4.166431858739551e-05, "loss": 45.766, "step": 2958, "task_loss": 0.5901462435722351 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42129629629629617, "compression/movement_sparsity/importance_threshold": -0.004116376253983214, "compression/movement_sparsity/linear_layer_sparsity": 0.3273446705457415, "compression/movement_sparsity/model_sparsity": 0.3160993740825002, "compression_loss": 45.2215576171875, "distillation_loss": 0.7480114698410034, "epoch": 2.5, "learning_rate": 4.16596224288532e-05, "loss": 46.2158, "step": 2959, "task_loss": 1.0550529956817627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4218826221926263, "compression/movement_sparsity/importance_threshold": -0.004112205660324832, "compression/movement_sparsity/linear_layer_sparsity": 0.3281372699684973, "compression/movement_sparsity/model_sparsity": 0.31686474527676334, "compression_loss": 45.284114837646484, "distillation_loss": 0.5181081295013428, "epoch": 2.5, "learning_rate": 4.165492627031089e-05, "loss": 46.134, "step": 2960, "task_loss": 0.30695840716362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4224685519228254, "compression/movement_sparsity/importance_threshold": -0.00410803788463503, "compression/movement_sparsity/linear_layer_sparsity": 0.3288518614865793, "compression/movement_sparsity/model_sparsity": 0.31755478837785855, "compression_loss": 45.34663009643555, "distillation_loss": 0.8578950762748718, "epoch": 2.5, "learning_rate": 4.165023011176857e-05, "loss": 46.366, "step": 2961, "task_loss": 1.3289865255355835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4230540856207784, "compression/movement_sparsity/importance_threshold": -0.004103872925961472, "compression/movement_sparsity/linear_layer_sparsity": 0.3295814774558825, "compression/movement_sparsity/model_sparsity": 0.3182593397940549, "compression_loss": 45.409141540527344, "distillation_loss": 0.8655504584312439, "epoch": 2.5, "learning_rate": 4.1645533953226264e-05, "loss": 46.2528, "step": 2962, "task_loss": 0.8352756500244141 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42363922342037075, "compression/movement_sparsity/importance_threshold": -0.004099710783351819, "compression/movement_sparsity/linear_layer_sparsity": 0.33041199573172036, "compression/movement_sparsity/model_sparsity": 0.3190613272121447, "compression_loss": 45.47157287597656, "distillation_loss": 0.7600586414337158, "epoch": 2.5, "learning_rate": 4.164083779468395e-05, "loss": 46.4202, "step": 2963, "task_loss": 0.788568377494812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42422396545548735, "compression/movement_sparsity/importance_threshold": -0.004095551455853731, "compression/movement_sparsity/linear_layer_sparsity": 0.331202603818481, "compression/movement_sparsity/model_sparsity": 0.3198247754789301, "compression_loss": 45.534000396728516, "distillation_loss": 1.0894231796264648, "epoch": 2.51, "learning_rate": 4.1636141636141643e-05, "loss": 46.3898, "step": 2964, "task_loss": 0.5878629088401794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42480831186001333, "compression/movement_sparsity/importance_threshold": -0.0040913949425148725, "compression/movement_sparsity/linear_layer_sparsity": 0.3318776786450177, "compression/movement_sparsity/model_sparsity": 0.32047665940840225, "compression_loss": 45.59641647338867, "distillation_loss": 0.7180629372596741, "epoch": 2.51, "learning_rate": 4.163144547759932e-05, "loss": 46.5708, "step": 2965, "task_loss": 0.327805757522583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42539226276783404, "compression/movement_sparsity/importance_threshold": -0.004087241242382904, "compression/movement_sparsity/linear_layer_sparsity": 0.3324703336248554, "compression/movement_sparsity/model_sparsity": 0.3210489548664625, "compression_loss": 45.65873336791992, "distillation_loss": 0.778712272644043, "epoch": 2.51, "learning_rate": 4.162674931905701e-05, "loss": 46.6166, "step": 2966, "task_loss": 0.7516246438026428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4259758183128345, "compression/movement_sparsity/importance_threshold": -0.004083090354505487, "compression/movement_sparsity/linear_layer_sparsity": 0.33316501178298547, "compression/movement_sparsity/model_sparsity": 0.3217197686927809, "compression_loss": 45.72102355957031, "distillation_loss": 0.8726775646209717, "epoch": 2.51, "learning_rate": 4.16220531605147e-05, "loss": 46.7511, "step": 2967, "task_loss": 1.386258840560913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4265589786288996, "compression/movement_sparsity/importance_threshold": -0.004078942277930285, "compression/movement_sparsity/linear_layer_sparsity": 0.33399380105451615, "compression/movement_sparsity/model_sparsity": 0.3225200865031804, "compression_loss": 45.78327560424805, "distillation_loss": 0.8169906139373779, "epoch": 2.51, "learning_rate": 4.161735700197239e-05, "loss": 46.6813, "step": 2968, "task_loss": 1.2122083902359009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.427141743849915, "compression/movement_sparsity/importance_threshold": -0.004074797011704957, "compression/movement_sparsity/linear_layer_sparsity": 0.3347086668284538, "compression/movement_sparsity/model_sparsity": 0.32321039443859895, "compression_loss": 45.84549331665039, "distillation_loss": 0.5126519203186035, "epoch": 2.51, "learning_rate": 4.1612660843430075e-05, "loss": 46.7045, "step": 2969, "task_loss": 0.38652852177619934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4277241141097655, "compression/movement_sparsity/importance_threshold": -0.004070654554877167, "compression/movement_sparsity/linear_layer_sparsity": 0.33543711422932865, "compression/movement_sparsity/model_sparsity": 0.32391381743028747, "compression_loss": 45.90763854980469, "distillation_loss": 0.9297010898590088, "epoch": 2.51, "learning_rate": 4.160796468488776e-05, "loss": 46.7647, "step": 2970, "task_loss": 0.848130464553833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4283060895423363, "compression/movement_sparsity/importance_threshold": -0.0040665149064945776, "compression/movement_sparsity/linear_layer_sparsity": 0.3361627236783062, "compression/movement_sparsity/model_sparsity": 0.32461449996245684, "compression_loss": 45.969764709472656, "distillation_loss": 0.37785035371780396, "epoch": 2.51, "learning_rate": 4.1603268526345455e-05, "loss": 46.7517, "step": 2971, "task_loss": 0.6027759313583374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42888767028151265, "compression/movement_sparsity/importance_threshold": -0.004062378065604847, "compression/movement_sparsity/linear_layer_sparsity": 0.3369349446985723, "compression/movement_sparsity/model_sparsity": 0.32536019281504713, "compression_loss": 46.03179168701172, "distillation_loss": 1.0049715042114258, "epoch": 2.51, "learning_rate": 4.159857236780314e-05, "loss": 47.1813, "step": 2972, "task_loss": 0.7254738807678223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4294688564611795, "compression/movement_sparsity/importance_threshold": -0.004058244031255641, "compression/movement_sparsity/linear_layer_sparsity": 0.3376050471472049, "compression/movement_sparsity/model_sparsity": 0.3260072751830929, "compression_loss": 46.093841552734375, "distillation_loss": 0.6662451028823853, "epoch": 2.51, "learning_rate": 4.159387620926083e-05, "loss": 47.1668, "step": 2973, "task_loss": 0.5740063786506653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4300496482152222, "compression/movement_sparsity/importance_threshold": -0.00405411280249462, "compression/movement_sparsity/linear_layer_sparsity": 0.3383606697261219, "compression/movement_sparsity/model_sparsity": 0.3267369398018572, "compression_loss": 46.15583038330078, "distillation_loss": 1.7834644317626953, "epoch": 2.51, "learning_rate": 4.1589180050718514e-05, "loss": 47.0937, "step": 2974, "task_loss": 1.0577107667922974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4306300456775256, "compression/movement_sparsity/importance_threshold": -0.0040499843783694455, "compression/movement_sparsity/linear_layer_sparsity": 0.3390619061764517, "compression/movement_sparsity/model_sparsity": 0.32741408662286253, "compression_loss": 46.217750549316406, "distillation_loss": 1.1757946014404297, "epoch": 2.51, "learning_rate": 4.15844838921762e-05, "loss": 47.3043, "step": 2975, "task_loss": 1.6968662738800049 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4312100489819751, "compression/movement_sparsity/importance_threshold": -0.004045858757927779, "compression/movement_sparsity/linear_layer_sparsity": 0.33991993350702554, "compression/movement_sparsity/model_sparsity": 0.3282426380750303, "compression_loss": 46.27967071533203, "distillation_loss": 1.4466767311096191, "epoch": 2.52, "learning_rate": 4.157978773363389e-05, "loss": 47.3954, "step": 2976, "task_loss": 0.8869592547416687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43178965826245586, "compression/movement_sparsity/importance_threshold": -0.004041735940217284, "compression/movement_sparsity/linear_layer_sparsity": 0.34064344430249915, "compression/movement_sparsity/model_sparsity": 0.3289412940488999, "compression_loss": 46.341556549072266, "distillation_loss": 1.0441604852676392, "epoch": 2.52, "learning_rate": 4.157509157509158e-05, "loss": 47.4804, "step": 2977, "task_loss": 0.24130839109420776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4323688736528526, "compression/movement_sparsity/importance_threshold": -0.004037615924285622, "compression/movement_sparsity/linear_layer_sparsity": 0.3414281737746153, "compression/movement_sparsity/model_sparsity": 0.3296990656495386, "compression_loss": 46.40338897705078, "distillation_loss": 2.0068116188049316, "epoch": 2.52, "learning_rate": 4.1570395416549266e-05, "loss": 47.486, "step": 2978, "task_loss": 1.4439735412597656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.432947695287051, "compression/movement_sparsity/importance_threshold": -0.004033498709180453, "compression/movement_sparsity/linear_layer_sparsity": 0.342139796174956, "compression/movement_sparsity/model_sparsity": 0.330386241631221, "compression_loss": 46.465213775634766, "distillation_loss": 1.2940680980682373, "epoch": 2.52, "learning_rate": 4.156569925800695e-05, "loss": 47.5544, "step": 2979, "task_loss": 0.8361204862594604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43352612329893603, "compression/movement_sparsity/importance_threshold": -0.00402938429394944, "compression/movement_sparsity/linear_layer_sparsity": 0.34272021695879923, "compression/movement_sparsity/model_sparsity": 0.33094672317555607, "compression_loss": 46.5269775390625, "distillation_loss": 1.0182418823242188, "epoch": 2.52, "learning_rate": 4.156100309946464e-05, "loss": 47.4441, "step": 2980, "task_loss": 0.8002575039863586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43410415782239276, "compression/movement_sparsity/importance_threshold": -0.004025272677640245, "compression/movement_sparsity/linear_layer_sparsity": 0.3434711414156677, "compression/movement_sparsity/model_sparsity": 0.33167185106721725, "compression_loss": 46.58871078491211, "distillation_loss": 0.9683611989021301, "epoch": 2.52, "learning_rate": 4.155630694092233e-05, "loss": 47.8209, "step": 2981, "task_loss": 0.8051085472106934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4346817989913063, "compression/movement_sparsity/importance_threshold": -0.0040211638593005295, "compression/movement_sparsity/linear_layer_sparsity": 0.34417646785549666, "compression/movement_sparsity/model_sparsity": 0.3323529473740002, "compression_loss": 46.650360107421875, "distillation_loss": 0.9547984004020691, "epoch": 2.52, "learning_rate": 4.155161078238001e-05, "loss": 47.8061, "step": 2982, "task_loss": 0.5127367973327637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43525904693956197, "compression/movement_sparsity/importance_threshold": -0.004017057837977955, "compression/movement_sparsity/linear_layer_sparsity": 0.34488623008568614, "compression/movement_sparsity/model_sparsity": 0.3330383270880986, "compression_loss": 46.712005615234375, "distillation_loss": 0.6433271169662476, "epoch": 2.52, "learning_rate": 4.1546914623837704e-05, "loss": 47.4361, "step": 2983, "task_loss": 1.1239526271820068 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4358359018010447, "compression/movement_sparsity/importance_threshold": -0.0040129546127201845, "compression/movement_sparsity/linear_layer_sparsity": 0.3455336527674753, "compression/movement_sparsity/model_sparsity": 0.3336635088090632, "compression_loss": 46.77362060546875, "distillation_loss": 0.7422006130218506, "epoch": 2.52, "learning_rate": 4.154221846529539e-05, "loss": 47.9083, "step": 2984, "task_loss": 0.39419934153556824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43641236370963965, "compression/movement_sparsity/importance_threshold": -0.004008854182574881, "compression/movement_sparsity/linear_layer_sparsity": 0.3461808131175765, "compression/movement_sparsity/model_sparsity": 0.3342884372102403, "compression_loss": 46.83517837524414, "distillation_loss": 0.8376699090003967, "epoch": 2.52, "learning_rate": 4.153752230675308e-05, "loss": 47.7444, "step": 2985, "task_loss": 0.3892742395401001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43698843279923205, "compression/movement_sparsity/importance_threshold": -0.004004756546589702, "compression/movement_sparsity/linear_layer_sparsity": 0.34697642935474415, "compression/movement_sparsity/model_sparsity": 0.33505672158205946, "compression_loss": 46.8967170715332, "distillation_loss": 0.5157288312911987, "epoch": 2.52, "learning_rate": 4.153282614821077e-05, "loss": 47.5839, "step": 2986, "task_loss": 1.085094928741455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43756410920370714, "compression/movement_sparsity/importance_threshold": -0.004000661703812313, "compression/movement_sparsity/linear_layer_sparsity": 0.3476713221478917, "compression/movement_sparsity/model_sparsity": 0.3357277426700221, "compression_loss": 46.9581413269043, "distillation_loss": 1.2159435749053955, "epoch": 2.52, "learning_rate": 4.152812998966845e-05, "loss": 47.9825, "step": 2987, "task_loss": 0.9753211736679077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4381393930569496, "compression/movement_sparsity/importance_threshold": -0.003996569653290377, "compression/movement_sparsity/linear_layer_sparsity": 0.34822708375306405, "compression/movement_sparsity/model_sparsity": 0.33626441215433406, "compression_loss": 47.0195198059082, "distillation_loss": 1.6849617958068848, "epoch": 2.53, "learning_rate": 4.152343383112614e-05, "loss": 48.2003, "step": 2988, "task_loss": 1.7843631505966187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4387142844928451, "compression/movement_sparsity/importance_threshold": -0.003992480394071552, "compression/movement_sparsity/linear_layer_sparsity": 0.348935486628143, "compression/movement_sparsity/model_sparsity": 0.33694847921135196, "compression_loss": 47.08089828491211, "distillation_loss": 0.9843062162399292, "epoch": 2.53, "learning_rate": 4.151873767258383e-05, "loss": 48.1153, "step": 2989, "task_loss": 0.36464327573776245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4392887836452787, "compression/movement_sparsity/importance_threshold": -0.0039883939252035, "compression/movement_sparsity/linear_layer_sparsity": 0.34968081865039025, "compression/movement_sparsity/model_sparsity": 0.3376682067857255, "compression_loss": 47.142215728759766, "distillation_loss": 0.6154696941375732, "epoch": 2.53, "learning_rate": 4.1514041514041515e-05, "loss": 48.0797, "step": 2990, "task_loss": 0.5178979635238647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43986289064813533, "compression/movement_sparsity/importance_threshold": -0.003984310245733886, "compression/movement_sparsity/linear_layer_sparsity": 0.35040777553031066, "compression/movement_sparsity/model_sparsity": 0.3383701904604397, "compression_loss": 47.203514099121094, "distillation_loss": 1.1159164905548096, "epoch": 2.53, "learning_rate": 4.15093453554992e-05, "loss": 48.2101, "step": 2991, "task_loss": 0.8842714428901672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4404366056353003, "compression/movement_sparsity/importance_threshold": -0.0039802293547103705, "compression/movement_sparsity/linear_layer_sparsity": 0.3511621699199612, "compression/movement_sparsity/model_sparsity": 0.3390986690820171, "compression_loss": 47.2647705078125, "distillation_loss": 0.9227436780929565, "epoch": 2.53, "learning_rate": 4.150464919695689e-05, "loss": 48.4477, "step": 2992, "task_loss": 1.4604997634887695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4410099287406586, "compression/movement_sparsity/importance_threshold": -0.003976151251180614, "compression/movement_sparsity/linear_layer_sparsity": 0.3518112381368841, "compression/movement_sparsity/model_sparsity": 0.33972543980892134, "compression_loss": 47.325958251953125, "distillation_loss": 1.1905728578567505, "epoch": 2.53, "learning_rate": 4.149995303841458e-05, "loss": 48.7206, "step": 2993, "task_loss": 1.2154463529586792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44158286009809544, "compression/movement_sparsity/importance_threshold": -0.003972075934192281, "compression/movement_sparsity/linear_layer_sparsity": 0.3525853550998043, "compression/movement_sparsity/model_sparsity": 0.34047296347270295, "compression_loss": 47.38713073730469, "distillation_loss": 1.1524235010147095, "epoch": 2.53, "learning_rate": 4.149525687987227e-05, "loss": 48.2701, "step": 2994, "task_loss": 0.809198260307312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.442155399841496, "compression/movement_sparsity/importance_threshold": -0.003968003402793031, "compression/movement_sparsity/linear_layer_sparsity": 0.35329855149027295, "compression/movement_sparsity/model_sparsity": 0.3411616593731102, "compression_loss": 47.44822692871094, "distillation_loss": 1.6604490280151367, "epoch": 2.53, "learning_rate": 4.1490560721329954e-05, "loss": 48.3713, "step": 2995, "task_loss": 1.538619041442871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44272754810474546, "compression/movement_sparsity/importance_threshold": -0.003963933656030525, "compression/movement_sparsity/linear_layer_sparsity": 0.35402057176479207, "compression/movement_sparsity/model_sparsity": 0.3418588760300055, "compression_loss": 47.50929260253906, "distillation_loss": 0.7487242221832275, "epoch": 2.53, "learning_rate": 4.148586456278764e-05, "loss": 48.4606, "step": 2996, "task_loss": 0.9759997129440308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4432993050217289, "compression/movement_sparsity/importance_threshold": -0.0039598666929524275, "compression/movement_sparsity/linear_layer_sparsity": 0.3547089539624104, "compression/movement_sparsity/model_sparsity": 0.3425236101814243, "compression_loss": 47.570343017578125, "distillation_loss": 1.6162116527557373, "epoch": 2.53, "learning_rate": 4.1481168404245327e-05, "loss": 48.5106, "step": 2997, "task_loss": 0.6641297936439514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44387067072633113, "compression/movement_sparsity/importance_threshold": -0.003955802512606401, "compression/movement_sparsity/linear_layer_sparsity": 0.3553502476220348, "compression/movement_sparsity/model_sparsity": 0.3431428734309905, "compression_loss": 47.63132858276367, "distillation_loss": 0.7055523991584778, "epoch": 2.53, "learning_rate": 4.147647224570302e-05, "loss": 48.6034, "step": 2998, "task_loss": 0.6708745360374451 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4444416453524378, "compression/movement_sparsity/importance_threshold": -0.003951741114040105, "compression/movement_sparsity/linear_layer_sparsity": 0.35606666353776506, "compression/movement_sparsity/model_sparsity": 0.3438346782560623, "compression_loss": 47.692283630371094, "distillation_loss": 1.3353440761566162, "epoch": 2.53, "learning_rate": 4.14717760871607e-05, "loss": 48.5988, "step": 2999, "task_loss": 0.665330171585083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.445012229033934, "compression/movement_sparsity/importance_threshold": -0.003947682496301202, "compression/movement_sparsity/linear_layer_sparsity": 0.3567670891446957, "compression/movement_sparsity/model_sparsity": 0.34451104208863365, "compression_loss": 47.75315856933594, "distillation_loss": 1.0968470573425293, "epoch": 2.54, "learning_rate": 4.146707992861839e-05, "loss": 48.7302, "step": 3000, "task_loss": 1.0680959224700928 }, { "epoch": 2.54, "eval_accuracy": 0.8681188118811881, "eval_loss": 48.38743209838867, "eval_runtime": 228.0156, "eval_samples_per_second": 110.738, "eval_steps_per_second": 0.868, "step": 3000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44558242190470465, "compression/movement_sparsity/importance_threshold": -0.003943626658437354, "compression/movement_sparsity/linear_layer_sparsity": 0.3574113996187319, "compression/movement_sparsity/model_sparsity": 0.34513321851575585, "compression_loss": 47.81401824951172, "distillation_loss": 0.5598782300949097, "epoch": 2.54, "learning_rate": 4.146238377007608e-05, "loss": 48.7777, "step": 3001, "task_loss": 0.22090116143226624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.446152224098635, "compression/movement_sparsity/importance_threshold": -0.003939573599496222, "compression/movement_sparsity/linear_layer_sparsity": 0.35808527010433744, "compression/movement_sparsity/model_sparsity": 0.3457839394771127, "compression_loss": 47.874794006347656, "distillation_loss": 0.9683035612106323, "epoch": 2.54, "learning_rate": 4.145768761153377e-05, "loss": 48.9487, "step": 3002, "task_loss": 1.4122188091278076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4467216357496101, "compression/movement_sparsity/importance_threshold": -0.003935523318525468, "compression/movement_sparsity/linear_layer_sparsity": 0.3588183083097521, "compression/movement_sparsity/model_sparsity": 0.34649179556508214, "compression_loss": 47.935585021972656, "distillation_loss": 0.7491236925125122, "epoch": 2.54, "learning_rate": 4.145299145299146e-05, "loss": 49.0646, "step": 3003, "task_loss": 1.0804551839828491 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44729065699151516, "compression/movement_sparsity/importance_threshold": -0.003931475814572757, "compression/movement_sparsity/linear_layer_sparsity": 0.359649053144775, "compression/movement_sparsity/model_sparsity": 0.347294001759352, "compression_loss": 47.99628448486328, "distillation_loss": 1.116276502609253, "epoch": 2.54, "learning_rate": 4.144829529444914e-05, "loss": 49.1673, "step": 3004, "task_loss": 1.0563294887542725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4478592879582354, "compression/movement_sparsity/importance_threshold": -0.003927431086685746, "compression/movement_sparsity/linear_layer_sparsity": 0.36029641620572606, "compression/movement_sparsity/model_sparsity": 0.3479191259076376, "compression_loss": 48.056976318359375, "distillation_loss": 0.9487784504890442, "epoch": 2.54, "learning_rate": 4.144359913590683e-05, "loss": 49.004, "step": 3005, "task_loss": 1.745464563369751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4484275287836559, "compression/movement_sparsity/importance_threshold": -0.0039233891339120995, "compression/movement_sparsity/linear_layer_sparsity": 0.3609588871870717, "compression/movement_sparsity/model_sparsity": 0.3485588389727749, "compression_loss": 48.117591857910156, "distillation_loss": 1.4345157146453857, "epoch": 2.54, "learning_rate": 4.143890297736452e-05, "loss": 49.2626, "step": 3006, "task_loss": 1.2622193098068237 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4489953796016617, "compression/movement_sparsity/importance_threshold": -0.003919349955299479, "compression/movement_sparsity/linear_layer_sparsity": 0.36168314920510636, "compression/movement_sparsity/model_sparsity": 0.3492582203623995, "compression_loss": 48.17817306518555, "distillation_loss": 0.6625068187713623, "epoch": 2.54, "learning_rate": 4.143420681882221e-05, "loss": 49.0428, "step": 3007, "task_loss": 0.37896668910980225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4495628405461377, "compression/movement_sparsity/importance_threshold": -0.003915313549895549, "compression/movement_sparsity/linear_layer_sparsity": 0.3623184807809128, "compression/movement_sparsity/model_sparsity": 0.34987172634406843, "compression_loss": 48.23872375488281, "distillation_loss": 1.0904512405395508, "epoch": 2.54, "learning_rate": 4.142951066027989e-05, "loss": 49.3413, "step": 3008, "task_loss": 1.9661799669265747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4501299117509697, "compression/movement_sparsity/importance_threshold": -0.003911279916747966, "compression/movement_sparsity/linear_layer_sparsity": 0.36292961822058606, "compression/movement_sparsity/model_sparsity": 0.35046186933261025, "compression_loss": 48.29923629760742, "distillation_loss": 0.9628196954727173, "epoch": 2.54, "learning_rate": 4.142481450173758e-05, "loss": 49.2959, "step": 3009, "task_loss": 0.8923752903938293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45069659335004253, "compression/movement_sparsity/importance_threshold": -0.003907249054904395, "compression/movement_sparsity/linear_layer_sparsity": 0.36363540970295277, "compression/movement_sparsity/model_sparsity": 0.3511434147062891, "compression_loss": 48.35975646972656, "distillation_loss": 1.065292239189148, "epoch": 2.54, "learning_rate": 4.142011834319527e-05, "loss": 49.2471, "step": 3010, "task_loss": 0.9318443536758423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45126288547724125, "compression/movement_sparsity/importance_threshold": -0.0039032209634124976, "compression/movement_sparsity/linear_layer_sparsity": 0.36435758499165116, "compression/movement_sparsity/model_sparsity": 0.35184078105214966, "compression_loss": 48.42021942138672, "distillation_loss": 0.7419764995574951, "epoch": 2.54, "learning_rate": 4.1415422184652956e-05, "loss": 49.4322, "step": 3011, "task_loss": 0.9149347543716431 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4518287882664511, "compression/movement_sparsity/importance_threshold": -0.003899195641319935, "compression/movement_sparsity/linear_layer_sparsity": 0.365033804538281, "compression/movement_sparsity/model_sparsity": 0.3524937703770581, "compression_loss": 48.48063659667969, "distillation_loss": 0.5538653135299683, "epoch": 2.55, "learning_rate": 4.141072602611064e-05, "loss": 49.1478, "step": 3012, "task_loss": 0.2962642014026642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45239430185155705, "compression/movement_sparsity/importance_threshold": -0.0038951730876743704, "compression/movement_sparsity/linear_layer_sparsity": 0.3658278348611531, "compression/movement_sparsity/model_sparsity": 0.3532605233156166, "compression_loss": 48.54099655151367, "distillation_loss": 1.606471061706543, "epoch": 2.55, "learning_rate": 4.140602986756833e-05, "loss": 49.804, "step": 3013, "task_loss": 0.6255779266357422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4529594263664445, "compression/movement_sparsity/importance_threshold": -0.0038911533015234642, "compression/movement_sparsity/linear_layer_sparsity": 0.36665687454020407, "compression/movement_sparsity/model_sparsity": 0.3540610829312678, "compression_loss": 48.6013069152832, "distillation_loss": 0.69710373878479, "epoch": 2.55, "learning_rate": 4.140133370902602e-05, "loss": 49.6986, "step": 3014, "task_loss": 0.13577695190906525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45352416194499834, "compression/movement_sparsity/importance_threshold": -0.003887136281914879, "compression/movement_sparsity/linear_layer_sparsity": 0.36720236943704193, "compression/movement_sparsity/model_sparsity": 0.35458783840026065, "compression_loss": 48.66159439086914, "distillation_loss": 0.6722149848937988, "epoch": 2.55, "learning_rate": 4.139663755048371e-05, "loss": 49.6143, "step": 3015, "task_loss": 0.36825031042099 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45408850872110396, "compression/movement_sparsity/importance_threshold": -0.0038831220278962757, "compression/movement_sparsity/linear_layer_sparsity": 0.3679077674218767, "compression/movement_sparsity/model_sparsity": 0.3552690037942583, "compression_loss": 48.7217903137207, "distillation_loss": 1.3119579553604126, "epoch": 2.55, "learning_rate": 4.1391941391941394e-05, "loss": 49.8983, "step": 3016, "task_loss": 1.8833730220794678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45465246682864624, "compression/movement_sparsity/importance_threshold": -0.003879110538515318, "compression/movement_sparsity/linear_layer_sparsity": 0.3688087217559396, "compression/movement_sparsity/model_sparsity": 0.35613900757528644, "compression_loss": 48.78200149536133, "distillation_loss": 0.9699065089225769, "epoch": 2.55, "learning_rate": 4.138724523339908e-05, "loss": 49.855, "step": 3017, "task_loss": 1.2321683168411255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4552160364015102, "compression/movement_sparsity/importance_threshold": -0.0038751018128196685, "compression/movement_sparsity/linear_layer_sparsity": 0.36958750106840543, "compression/movement_sparsity/model_sparsity": 0.3568910334225637, "compression_loss": 48.84210205078125, "distillation_loss": 0.6684330701828003, "epoch": 2.55, "learning_rate": 4.138254907485677e-05, "loss": 49.4976, "step": 3018, "task_loss": 0.3626573979854584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45577921757358153, "compression/movement_sparsity/importance_threshold": -0.003871095849856985, "compression/movement_sparsity/linear_layer_sparsity": 0.370305347884252, "compression/movement_sparsity/model_sparsity": 0.35758421999193085, "compression_loss": 48.90217208862305, "distillation_loss": 0.8525633811950684, "epoch": 2.55, "learning_rate": 4.137785291631446e-05, "loss": 49.7906, "step": 3019, "task_loss": 1.1545358896255493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.456342010478745, "compression/movement_sparsity/importance_threshold": -0.003867092648674932, "compression/movement_sparsity/linear_layer_sparsity": 0.37097426984028864, "compression/movement_sparsity/model_sparsity": 0.358230162420933, "compression_loss": 48.96220397949219, "distillation_loss": 0.6798500418663025, "epoch": 2.55, "learning_rate": 4.1373156757772146e-05, "loss": 49.6657, "step": 3020, "task_loss": 0.5701820850372314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4569044152508859, "compression/movement_sparsity/importance_threshold": -0.00386309220832117, "compression/movement_sparsity/linear_layer_sparsity": 0.37174694397892494, "compression/movement_sparsity/model_sparsity": 0.35897629282588345, "compression_loss": 49.02214050292969, "distillation_loss": 1.1562728881835938, "epoch": 2.55, "learning_rate": 4.136846059922983e-05, "loss": 50.1125, "step": 3021, "task_loss": 0.8365206122398376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45746643202388926, "compression/movement_sparsity/importance_threshold": -0.0038590945278433623, "compression/movement_sparsity/linear_layer_sparsity": 0.37249206136615476, "compression/movement_sparsity/model_sparsity": 0.3596958131386127, "compression_loss": 49.08209991455078, "distillation_loss": 0.7138584852218628, "epoch": 2.55, "learning_rate": 4.136376444068752e-05, "loss": 49.9989, "step": 3022, "task_loss": 1.1370450258255005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45802806093164017, "compression/movement_sparsity/importance_threshold": -0.0038550996062891706, "compression/movement_sparsity/linear_layer_sparsity": 0.3732858293573389, "compression/movement_sparsity/model_sparsity": 0.3604623127573837, "compression_loss": 49.141963958740234, "distillation_loss": 0.7289991974830627, "epoch": 2.56, "learning_rate": 4.1359068282145205e-05, "loss": 50.0278, "step": 3023, "task_loss": 0.2928905785083771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45858930210802396, "compression/movement_sparsity/importance_threshold": -0.003851107442706256, "compression/movement_sparsity/linear_layer_sparsity": 0.37411187607031327, "compression/movement_sparsity/model_sparsity": 0.3612599822245505, "compression_loss": 49.201805114746094, "distillation_loss": 0.8476933836936951, "epoch": 2.56, "learning_rate": 4.13543721236029e-05, "loss": 50.1828, "step": 3024, "task_loss": 0.24252696335315704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4591501556869255, "compression/movement_sparsity/importance_threshold": -0.0038471180361422815, "compression/movement_sparsity/linear_layer_sparsity": 0.37480544528085324, "compression/movement_sparsity/model_sparsity": 0.36192972519904, "compression_loss": 49.26160430908203, "distillation_loss": 0.9272516965866089, "epoch": 2.56, "learning_rate": 4.134967596506058e-05, "loss": 50.0309, "step": 3025, "task_loss": 0.814532458782196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4597106218022302, "compression/movement_sparsity/importance_threshold": -0.0038431313856449076, "compression/movement_sparsity/linear_layer_sparsity": 0.3756678010842789, "compression/movement_sparsity/model_sparsity": 0.36276245642770116, "compression_loss": 49.32135772705078, "distillation_loss": 0.8880274295806885, "epoch": 2.56, "learning_rate": 4.134497980651827e-05, "loss": 50.2607, "step": 3026, "task_loss": 1.4027799367904663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46027070058782316, "compression/movement_sparsity/importance_threshold": -0.0038391474902617967, "compression/movement_sparsity/linear_layer_sparsity": 0.37647251546135274, "compression/movement_sparsity/model_sparsity": 0.36353952639033155, "compression_loss": 49.381019592285156, "distillation_loss": 0.8399969339370728, "epoch": 2.56, "learning_rate": 4.134028364797596e-05, "loss": 50.3166, "step": 3027, "task_loss": 0.41276857256889343 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46083039217758914, "compression/movement_sparsity/importance_threshold": -0.003835166349040612, "compression/movement_sparsity/linear_layer_sparsity": 0.37728610141914765, "compression/movement_sparsity/model_sparsity": 0.36432516316759306, "compression_loss": 49.44065475463867, "distillation_loss": 0.907224178314209, "epoch": 2.56, "learning_rate": 4.1335587489433644e-05, "loss": 50.5249, "step": 3028, "task_loss": 0.6105089783668518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46138969670541374, "compression/movement_sparsity/importance_threshold": -0.0038311879610290135, "compression/movement_sparsity/linear_layer_sparsity": 0.37799279913825473, "compression/movement_sparsity/model_sparsity": 0.36500758364599234, "compression_loss": 49.50025939941406, "distillation_loss": 0.7092622518539429, "epoch": 2.56, "learning_rate": 4.133089133089133e-05, "loss": 50.7458, "step": 3029, "task_loss": 0.23481151461601257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4619486143051821, "compression/movement_sparsity/importance_threshold": -0.003827212325274663, "compression/movement_sparsity/linear_layer_sparsity": 0.37887212303222617, "compression/movement_sparsity/model_sparsity": 0.36585670005908916, "compression_loss": 49.559852600097656, "distillation_loss": 1.1030921936035156, "epoch": 2.56, "learning_rate": 4.1326195172349016e-05, "loss": 50.4887, "step": 3030, "task_loss": 0.4547232985496521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46250714511077906, "compression/movement_sparsity/importance_threshold": -0.0038232394408252235, "compression/movement_sparsity/linear_layer_sparsity": 0.37958184948991275, "compression/movement_sparsity/model_sparsity": 0.3665420452295802, "compression_loss": 49.61936950683594, "distillation_loss": 1.0254993438720703, "epoch": 2.56, "learning_rate": 4.132149901380671e-05, "loss": 50.4964, "step": 3031, "task_loss": 1.1598517894744873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46306528925609003, "compression/movement_sparsity/importance_threshold": -0.0038192693067283555, "compression/movement_sparsity/linear_layer_sparsity": 0.38039739101119996, "compression/movement_sparsity/model_sparsity": 0.367329570390712, "compression_loss": 49.67885208129883, "distillation_loss": 1.2935999631881714, "epoch": 2.56, "learning_rate": 4.1316802855264396e-05, "loss": 50.834, "step": 3032, "task_loss": 1.1587638854980469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.463623046875, "compression/movement_sparsity/importance_threshold": -0.003815301922031722, "compression/movement_sparsity/linear_layer_sparsity": 0.381101632351774, "compression/movement_sparsity/model_sparsity": 0.3680096188747376, "compression_loss": 49.73830032348633, "distillation_loss": 0.6887329816818237, "epoch": 2.56, "learning_rate": 4.131210669672209e-05, "loss": 50.6941, "step": 3033, "task_loss": 1.4749889373779297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4641804181013941, "compression/movement_sparsity/importance_threshold": -0.0038113372857829847, "compression/movement_sparsity/linear_layer_sparsity": 0.3819125353718509, "compression/movement_sparsity/model_sparsity": 0.36879266488144535, "compression_loss": 49.7977180480957, "distillation_loss": 1.4097110033035278, "epoch": 2.56, "learning_rate": 4.130741053817977e-05, "loss": 50.8854, "step": 3034, "task_loss": 1.3078335523605347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46473740306915756, "compression/movement_sparsity/importance_threshold": -0.003807375397029805, "compression/movement_sparsity/linear_layer_sparsity": 0.38274639241462677, "compression/movement_sparsity/model_sparsity": 0.3695978763695576, "compression_loss": 49.85708236694336, "distillation_loss": 1.2002642154693604, "epoch": 2.57, "learning_rate": 4.130271437963746e-05, "loss": 50.9491, "step": 3035, "task_loss": 0.7746964693069458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4652940019121755, "compression/movement_sparsity/importance_threshold": -0.003803416254819845, "compression/movement_sparsity/linear_layer_sparsity": 0.3835135933603182, "compression/movement_sparsity/model_sparsity": 0.37033872160257836, "compression_loss": 49.91639709472656, "distillation_loss": 0.9503117799758911, "epoch": 2.57, "learning_rate": 4.129801822109515e-05, "loss": 50.7504, "step": 3036, "task_loss": 1.690313696861267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46585021476433297, "compression/movement_sparsity/importance_threshold": -0.003799459858200767, "compression/movement_sparsity/linear_layer_sparsity": 0.38416084910376047, "compression/movement_sparsity/model_sparsity": 0.3709637421200418, "compression_loss": 49.97568130493164, "distillation_loss": 0.8271883130073547, "epoch": 2.57, "learning_rate": 4.1293322062552834e-05, "loss": 51.0494, "step": 3037, "task_loss": 0.8465777039527893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4664060417595153, "compression/movement_sparsity/importance_threshold": -0.0037955062062202316, "compression/movement_sparsity/linear_layer_sparsity": 0.38495217264057924, "compression/movement_sparsity/model_sparsity": 0.37172788125897493, "compression_loss": 50.034908294677734, "distillation_loss": 0.6918247938156128, "epoch": 2.57, "learning_rate": 4.128862590401052e-05, "loss": 50.9274, "step": 3038, "task_loss": 0.7032532095909119 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4669614830316071, "compression/movement_sparsity/importance_threshold": -0.003791555297925904, "compression/movement_sparsity/linear_layer_sparsity": 0.3856208918857661, "compression/movement_sparsity/model_sparsity": 0.3723736279408686, "compression_loss": 50.094146728515625, "distillation_loss": 2.0564489364624023, "epoch": 2.57, "learning_rate": 4.128392974546821e-05, "loss": 51.3653, "step": 3039, "task_loss": 1.5027828216552734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4675165387144943, "compression/movement_sparsity/importance_threshold": -0.0037876071323654412, "compression/movement_sparsity/linear_layer_sparsity": 0.38621407152897974, "compression/movement_sparsity/model_sparsity": 0.3729464300385038, "compression_loss": 50.153324127197266, "distillation_loss": 1.0967414379119873, "epoch": 2.57, "learning_rate": 4.12792335869259e-05, "loss": 51.5354, "step": 3040, "task_loss": 1.325130820274353 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46807120894206156, "compression/movement_sparsity/importance_threshold": -0.003783661708586508, "compression/movement_sparsity/linear_layer_sparsity": 0.38694755092859695, "compression/movement_sparsity/model_sparsity": 0.3736547121642976, "compression_loss": 50.21247482299805, "distillation_loss": 1.3885177373886108, "epoch": 2.57, "learning_rate": 4.1274537428383586e-05, "loss": 51.3596, "step": 3041, "task_loss": 1.1564867496490479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4686254938481941, "compression/movement_sparsity/importance_threshold": -0.0037797190256367657, "compression/movement_sparsity/linear_layer_sparsity": 0.3875707198534148, "compression/movement_sparsity/model_sparsity": 0.3742564733194561, "compression_loss": 50.271583557128906, "distillation_loss": 1.2108824253082275, "epoch": 2.57, "learning_rate": 4.126984126984127e-05, "loss": 51.5936, "step": 3042, "task_loss": 0.9624378085136414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4691793935667772, "compression/movement_sparsity/importance_threshold": -0.0037757790825638754, "compression/movement_sparsity/linear_layer_sparsity": 0.38826261968048575, "compression/movement_sparsity/model_sparsity": 0.3749246042589343, "compression_loss": 50.3306884765625, "distillation_loss": 1.3404388427734375, "epoch": 2.57, "learning_rate": 4.126514511129896e-05, "loss": 51.5479, "step": 3043, "task_loss": 1.6564874649047852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4697329082316958, "compression/movement_sparsity/importance_threshold": -0.0037718418784155, "compression/movement_sparsity/linear_layer_sparsity": 0.3890353653641278, "compression/movement_sparsity/model_sparsity": 0.3756708037510995, "compression_loss": 50.38972091674805, "distillation_loss": 1.4668290615081787, "epoch": 2.57, "learning_rate": 4.1260448952756645e-05, "loss": 51.8125, "step": 3044, "task_loss": 1.026339054107666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4702860379768351, "compression/movement_sparsity/importance_threshold": -0.003767907412239301, "compression/movement_sparsity/linear_layer_sparsity": 0.38980176739058764, "compression/movement_sparsity/model_sparsity": 0.37641087751022206, "compression_loss": 50.44871520996094, "distillation_loss": 1.0186166763305664, "epoch": 2.57, "learning_rate": 4.125575279421434e-05, "loss": 51.4643, "step": 3045, "task_loss": 1.0154277086257935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4708387829360803, "compression/movement_sparsity/importance_threshold": -0.00376397568308294, "compression/movement_sparsity/linear_layer_sparsity": 0.39043422524199384, "compression/movement_sparsity/model_sparsity": 0.3770216084887645, "compression_loss": 50.507686614990234, "distillation_loss": 0.750328779220581, "epoch": 2.57, "learning_rate": 4.125105663567202e-05, "loss": 51.202, "step": 3046, "task_loss": 0.7444518804550171 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47139114324331644, "compression/movement_sparsity/importance_threshold": -0.00376004668999408, "compression/movement_sparsity/linear_layer_sparsity": 0.3911733686212381, "compression/movement_sparsity/model_sparsity": 0.3777353600190607, "compression_loss": 50.56656265258789, "distillation_loss": 0.7425801157951355, "epoch": 2.58, "learning_rate": 4.124636047712971e-05, "loss": 51.4702, "step": 3047, "task_loss": 0.8226586580276489 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4719431190324288, "compression/movement_sparsity/importance_threshold": -0.003756120432020381, "compression/movement_sparsity/linear_layer_sparsity": 0.3918947688390401, "compression/movement_sparsity/model_sparsity": 0.37843197792009464, "compression_loss": 50.625423431396484, "distillation_loss": 1.5537610054016113, "epoch": 2.58, "learning_rate": 4.12416643185874e-05, "loss": 51.565, "step": 3048, "task_loss": 1.8071502447128296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4724947104373022, "compression/movement_sparsity/importance_threshold": -0.0037521969082095078, "compression/movement_sparsity/linear_layer_sparsity": 0.3926854603949742, "compression/movement_sparsity/model_sparsity": 0.3791955067886306, "compression_loss": 50.684242248535156, "distillation_loss": 1.3142611980438232, "epoch": 2.58, "learning_rate": 4.1236968160045084e-05, "loss": 51.7009, "step": 3049, "task_loss": 1.4460989236831665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47304591759182224, "compression/movement_sparsity/importance_threshold": -0.0037482761176091182, "compression/movement_sparsity/linear_layer_sparsity": 0.39343643254851324, "compression/movement_sparsity/model_sparsity": 0.379920680738435, "compression_loss": 50.74298858642578, "distillation_loss": 0.9851248264312744, "epoch": 2.58, "learning_rate": 4.123227200150278e-05, "loss": 52.0112, "step": 3050, "task_loss": 1.4789472818374634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4735967406298738, "compression/movement_sparsity/importance_threshold": -0.0037443580592668767, "compression/movement_sparsity/linear_layer_sparsity": 0.3941416278224982, "compression/movement_sparsity/model_sparsity": 0.3806016503853242, "compression_loss": 50.80170822143555, "distillation_loss": 1.1012539863586426, "epoch": 2.58, "learning_rate": 4.122757584296046e-05, "loss": 51.8004, "step": 3051, "task_loss": 0.6958345174789429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47414717968534215, "compression/movement_sparsity/importance_threshold": -0.003740442732230444, "compression/movement_sparsity/linear_layer_sparsity": 0.394840801391827, "compression/movement_sparsity/model_sparsity": 0.38127680519163704, "compression_loss": 50.8603630065918, "distillation_loss": 1.4485851526260376, "epoch": 2.58, "learning_rate": 4.122287968441815e-05, "loss": 52.0106, "step": 3052, "task_loss": 1.2276757955551147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47469723489211224, "compression/movement_sparsity/importance_threshold": -0.0037365301355474835, "compression/movement_sparsity/linear_layer_sparsity": 0.39557062007198, "compression/movement_sparsity/model_sparsity": 0.3819815523549419, "compression_loss": 50.91899108886719, "distillation_loss": 0.826873779296875, "epoch": 2.58, "learning_rate": 4.1218183525875836e-05, "loss": 51.8911, "step": 3053, "task_loss": 0.3747551441192627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4752469063840694, "compression/movement_sparsity/importance_threshold": -0.0037326202682656548, "compression/movement_sparsity/linear_layer_sparsity": 0.39634166059965015, "compression/movement_sparsity/model_sparsity": 0.3827261052684885, "compression_loss": 50.97757339477539, "distillation_loss": 1.211850643157959, "epoch": 2.58, "learning_rate": 4.121348736733352e-05, "loss": 52.22, "step": 3054, "task_loss": 1.2509483098983765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4757961942950987, "compression/movement_sparsity/importance_threshold": -0.0037287131294326216, "compression/movement_sparsity/linear_layer_sparsity": 0.39712946650701636, "compression/movement_sparsity/model_sparsity": 0.38348684761936225, "compression_loss": 51.036128997802734, "distillation_loss": 1.382433533668518, "epoch": 2.58, "learning_rate": 4.120879120879121e-05, "loss": 52.3868, "step": 3055, "task_loss": 1.1847749948501587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47634509875908515, "compression/movement_sparsity/importance_threshold": -0.0037248087180960453, "compression/movement_sparsity/linear_layer_sparsity": 0.39790524092923796, "compression/movement_sparsity/model_sparsity": 0.3842359718036193, "compression_loss": 51.09466552734375, "distillation_loss": 0.6754745244979858, "epoch": 2.58, "learning_rate": 4.1204095050248895e-05, "loss": 52.1746, "step": 3056, "task_loss": 0.7180325984954834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4768936199099141, "compression/movement_sparsity/importance_threshold": -0.0037209070333035876, "compression/movement_sparsity/linear_layer_sparsity": 0.3985598896566145, "compression/movement_sparsity/model_sparsity": 0.38486813133327535, "compression_loss": 51.15315246582031, "distillation_loss": 1.1927275657653809, "epoch": 2.58, "learning_rate": 4.119939889170659e-05, "loss": 52.2787, "step": 3057, "task_loss": 0.9376187324523926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4774417578814706, "compression/movement_sparsity/importance_threshold": -0.0037170080741029104, "compression/movement_sparsity/linear_layer_sparsity": 0.39912212608481307, "compression/movement_sparsity/model_sparsity": 0.38541105321052377, "compression_loss": 51.211605072021484, "distillation_loss": 1.4279518127441406, "epoch": 2.58, "learning_rate": 4.1194702733164275e-05, "loss": 52.4378, "step": 3058, "task_loss": 1.4196845293045044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47798951280763957, "compression/movement_sparsity/importance_threshold": -0.0037131118395416765, "compression/movement_sparsity/linear_layer_sparsity": 0.39992670929604296, "compression/movement_sparsity/model_sparsity": 0.3861879965132604, "compression_loss": 51.27001953125, "distillation_loss": 0.8734998106956482, "epoch": 2.59, "learning_rate": 4.119000657462196e-05, "loss": 52.3642, "step": 3059, "task_loss": 0.8366615176200867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4785368848223065, "compression/movement_sparsity/importance_threshold": -0.003709218328667546, "compression/movement_sparsity/linear_layer_sparsity": 0.4008185416418644, "compression/movement_sparsity/model_sparsity": 0.38704919167440566, "compression_loss": 51.328365325927734, "distillation_loss": 0.5253501534461975, "epoch": 2.59, "learning_rate": 4.118531041607965e-05, "loss": 52.3233, "step": 3060, "task_loss": 1.4695879220962524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4790838740593565, "compression/movement_sparsity/importance_threshold": -0.0037053275405281813, "compression/movement_sparsity/linear_layer_sparsity": 0.4015099883505652, "compression/movement_sparsity/model_sparsity": 0.38771688506152374, "compression_loss": 51.38669204711914, "distillation_loss": 0.9726161360740662, "epoch": 2.59, "learning_rate": 4.1180614257537334e-05, "loss": 52.1983, "step": 3061, "task_loss": 0.5793732404708862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47963048065267455, "compression/movement_sparsity/importance_threshold": -0.0037014394741712443, "compression/movement_sparsity/linear_layer_sparsity": 0.40196662435018055, "compression/movement_sparsity/model_sparsity": 0.3881578342097757, "compression_loss": 51.44495391845703, "distillation_loss": 1.2422739267349243, "epoch": 2.59, "learning_rate": 4.117591809899503e-05, "loss": 52.6784, "step": 3062, "task_loss": 1.045922040939331 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48017670473614593, "compression/movement_sparsity/importance_threshold": -0.0036975541286443964, "compression/movement_sparsity/linear_layer_sparsity": 0.40275941455961856, "compression/movement_sparsity/model_sparsity": 0.3889233896366115, "compression_loss": 51.50320053100586, "distillation_loss": 0.9213982820510864, "epoch": 2.59, "learning_rate": 4.117122194045271e-05, "loss": 53.1972, "step": 3063, "task_loss": 2.0543627738952637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48072254644365564, "compression/movement_sparsity/importance_threshold": -0.003693671502995301, "compression/movement_sparsity/linear_layer_sparsity": 0.40355964544966133, "compression/movement_sparsity/model_sparsity": 0.3896961301337831, "compression_loss": 51.56140899658203, "distillation_loss": 0.9550180435180664, "epoch": 2.59, "learning_rate": 4.11665257819104e-05, "loss": 52.4633, "step": 3064, "task_loss": 1.3067706823349 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4812680059090889, "compression/movement_sparsity/importance_threshold": -0.0036897915962716184, "compression/movement_sparsity/linear_layer_sparsity": 0.40431390867346784, "compression/movement_sparsity/model_sparsity": 0.3904244820954668, "compression_loss": 51.619529724121094, "distillation_loss": 1.7637337446212769, "epoch": 2.59, "learning_rate": 4.1161829623368086e-05, "loss": 52.7817, "step": 3065, "task_loss": 1.7388800382614136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4818130832663309, "compression/movement_sparsity/importance_threshold": -0.0036859144075210115, "compression/movement_sparsity/linear_layer_sparsity": 0.40502243079022315, "compression/movement_sparsity/model_sparsity": 0.39110866429784263, "compression_loss": 51.67762756347656, "distillation_loss": 0.869659423828125, "epoch": 2.59, "learning_rate": 4.115713346482578e-05, "loss": 52.5816, "step": 3066, "task_loss": 0.2731804847717285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48235777864926654, "compression/movement_sparsity/importance_threshold": -0.003682039935791142, "compression/movement_sparsity/linear_layer_sparsity": 0.40577843494250454, "compression/movement_sparsity/model_sparsity": 0.3918386973817523, "compression_loss": 51.73568344116211, "distillation_loss": 0.9186686277389526, "epoch": 2.59, "learning_rate": 4.1152437306283465e-05, "loss": 53.2227, "step": 3067, "task_loss": 2.569932222366333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48290209219178126, "compression/movement_sparsity/importance_threshold": -0.003678168180129671, "compression/movement_sparsity/linear_layer_sparsity": 0.40656963923764694, "compression/movement_sparsity/model_sparsity": 0.3926027213753275, "compression_loss": 51.793724060058594, "distillation_loss": 1.025024652481079, "epoch": 2.59, "learning_rate": 4.1147741147741145e-05, "loss": 53.0395, "step": 3068, "task_loss": 1.0355565547943115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4834460240277598, "compression/movement_sparsity/importance_threshold": -0.0036742991395842628, "compression/movement_sparsity/linear_layer_sparsity": 0.40736180939036787, "compression/movement_sparsity/model_sparsity": 0.393367678046302, "compression_loss": 51.85167694091797, "distillation_loss": 1.0266690254211426, "epoch": 2.59, "learning_rate": 4.114304498919884e-05, "loss": 52.9247, "step": 3069, "task_loss": 0.4221527576446533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4839895742910879, "compression/movement_sparsity/importance_threshold": -0.0036704328132025747, "compression/movement_sparsity/linear_layer_sparsity": 0.4081738332822025, "compression/movement_sparsity/model_sparsity": 0.3941518064193744, "compression_loss": 51.909610748291016, "distillation_loss": 2.0047271251678467, "epoch": 2.59, "learning_rate": 4.1138348830656524e-05, "loss": 53.4215, "step": 3070, "task_loss": 1.917396068572998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4845327431156503, "compression/movement_sparsity/importance_threshold": -0.003666569200032272, "compression/movement_sparsity/linear_layer_sparsity": 0.4089986756542457, "compression/movement_sparsity/model_sparsity": 0.39494831291842597, "compression_loss": 51.967533111572266, "distillation_loss": 1.4897263050079346, "epoch": 2.6, "learning_rate": 4.113365267211422e-05, "loss": 53.4788, "step": 3071, "task_loss": 1.5780733823776245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48507553063533226, "compression/movement_sparsity/importance_threshold": -0.0036627082991210155, "compression/movement_sparsity/linear_layer_sparsity": 0.40979909733097064, "compression/movement_sparsity/model_sparsity": 0.39572123764817035, "compression_loss": 52.02539825439453, "distillation_loss": 1.71376633644104, "epoch": 2.6, "learning_rate": 4.11289565135719e-05, "loss": 53.4324, "step": 3072, "task_loss": 0.9465539455413818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4856179369840188, "compression/movement_sparsity/importance_threshold": -0.0036588501095164675, "compression/movement_sparsity/linear_layer_sparsity": 0.4105489724610871, "compression/movement_sparsity/model_sparsity": 0.3964453522606816, "compression_loss": 52.083221435546875, "distillation_loss": 1.1375422477722168, "epoch": 2.6, "learning_rate": 4.112426035502959e-05, "loss": 53.4026, "step": 3073, "task_loss": 0.936212420463562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4861599622955952, "compression/movement_sparsity/importance_threshold": -0.003654994630266289, "compression/movement_sparsity/linear_layer_sparsity": 0.41126369514501315, "compression/movement_sparsity/model_sparsity": 0.39713552202167063, "compression_loss": 52.140995025634766, "distillation_loss": 1.3847780227661133, "epoch": 2.6, "learning_rate": 4.1119564196487276e-05, "loss": 53.5469, "step": 3074, "task_loss": 1.1868997812271118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48670160670394647, "compression/movement_sparsity/importance_threshold": -0.0036511418604181435, "compression/movement_sparsity/linear_layer_sparsity": 0.41204259369915536, "compression/movement_sparsity/model_sparsity": 0.3978876630143058, "compression_loss": 52.19874954223633, "distillation_loss": 1.3466531038284302, "epoch": 2.6, "learning_rate": 4.111486803794496e-05, "loss": 53.3238, "step": 3075, "task_loss": 0.9713649153709412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4872428703429579, "compression/movement_sparsity/importance_threshold": -0.0036472917990196907, "compression/movement_sparsity/linear_layer_sparsity": 0.41272636124389867, "compression/movement_sparsity/model_sparsity": 0.39854794104037217, "compression_loss": 52.25645446777344, "distillation_loss": 0.9357100129127502, "epoch": 2.6, "learning_rate": 4.111017187940265e-05, "loss": 53.3275, "step": 3076, "task_loss": 0.8333291411399841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48778375334651447, "compression/movement_sparsity/importance_threshold": -0.0036434444451185943, "compression/movement_sparsity/linear_layer_sparsity": 0.41356953106159816, "compression/movement_sparsity/model_sparsity": 0.39936214538093995, "compression_loss": 52.314109802246094, "distillation_loss": 1.2986867427825928, "epoch": 2.6, "learning_rate": 4.1105475720860335e-05, "loss": 53.3159, "step": 3077, "task_loss": 1.350926160812378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4883242558485015, "compression/movement_sparsity/importance_threshold": -0.0036395997977625148, "compression/movement_sparsity/linear_layer_sparsity": 0.41430215192114556, "compression/movement_sparsity/model_sparsity": 0.4000695984601565, "compression_loss": 52.3717041015625, "distillation_loss": 0.9928900003433228, "epoch": 2.6, "learning_rate": 4.110077956231803e-05, "loss": 53.5345, "step": 3078, "task_loss": 1.0358116626739502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4888643779828038, "compression/movement_sparsity/importance_threshold": -0.003635757855999116, "compression/movement_sparsity/linear_layer_sparsity": 0.41514014665009114, "compression/movement_sparsity/model_sparsity": 0.40087880549218946, "compression_loss": 52.42930221557617, "distillation_loss": 0.7367823719978333, "epoch": 2.6, "learning_rate": 4.1096083403775715e-05, "loss": 53.6794, "step": 3079, "task_loss": 0.7170791625976562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4894041198833069, "compression/movement_sparsity/importance_threshold": -0.0036319186188760576, "compression/movement_sparsity/linear_layer_sparsity": 0.4158943621772271, "compression/movement_sparsity/model_sparsity": 0.40160711139572997, "compression_loss": 52.48683547973633, "distillation_loss": 2.1020989418029785, "epoch": 2.6, "learning_rate": 4.10913872452334e-05, "loss": 53.8485, "step": 3080, "task_loss": 1.6170066595077515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4899434816838958, "compression/movement_sparsity/importance_threshold": -0.003628082085441002, "compression/movement_sparsity/linear_layer_sparsity": 0.4166072366151695, "compression/movement_sparsity/model_sparsity": 0.4022954964036708, "compression_loss": 52.54432678222656, "distillation_loss": 0.9117029905319214, "epoch": 2.6, "learning_rate": 4.108669108669109e-05, "loss": 53.386, "step": 3081, "task_loss": 1.1352143287658691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4904824635184556, "compression/movement_sparsity/importance_threshold": -0.0036242482547416115, "compression/movement_sparsity/linear_layer_sparsity": 0.4172836707968168, "compression/movement_sparsity/model_sparsity": 0.4029486929902235, "compression_loss": 52.601806640625, "distillation_loss": 2.0378811359405518, "epoch": 2.6, "learning_rate": 4.1081994928148774e-05, "loss": 53.6064, "step": 3082, "task_loss": 1.6460951566696167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49102106552087144, "compression/movement_sparsity/importance_threshold": -0.003620417125825548, "compression/movement_sparsity/linear_layer_sparsity": 0.4180330808843955, "compression/movement_sparsity/model_sparsity": 0.4036723585358388, "compression_loss": 52.659210205078125, "distillation_loss": 1.176930546760559, "epoch": 2.61, "learning_rate": 4.107729876960647e-05, "loss": 54.2437, "step": 3083, "task_loss": 0.5851922035217285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49155928782502856, "compression/movement_sparsity/importance_threshold": -0.003616588697740472, "compression/movement_sparsity/linear_layer_sparsity": 0.4186500134695398, "compression/movement_sparsity/model_sparsity": 0.40426809758877674, "compression_loss": 52.71659851074219, "distillation_loss": 1.5501749515533447, "epoch": 2.61, "learning_rate": 4.107260261106415e-05, "loss": 54.0008, "step": 3084, "task_loss": 1.062578558921814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49209713056481197, "compression/movement_sparsity/importance_threshold": -0.0036127629695340475, "compression/movement_sparsity/linear_layer_sparsity": 0.4193819665756996, "compression/movement_sparsity/model_sparsity": 0.4049749058539888, "compression_loss": 52.7739372253418, "distillation_loss": 0.9844653010368347, "epoch": 2.61, "learning_rate": 4.106790645252184e-05, "loss": 53.9447, "step": 3085, "task_loss": 1.6032243967056274 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4926345938741069, "compression/movement_sparsity/importance_threshold": -0.003608939940253935, "compression/movement_sparsity/linear_layer_sparsity": 0.4200267659406089, "compression/movement_sparsity/model_sparsity": 0.4055975543770786, "compression_loss": 52.83121871948242, "distillation_loss": 0.7799921631813049, "epoch": 2.61, "learning_rate": 4.1063210293979526e-05, "loss": 53.7783, "step": 3086, "task_loss": 0.473429799079895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4931716778867985, "compression/movement_sparsity/importance_threshold": -0.003605119608947796, "compression/movement_sparsity/linear_layer_sparsity": 0.4207535081855118, "compression/movement_sparsity/model_sparsity": 0.4062993307901485, "compression_loss": 52.888450622558594, "distillation_loss": 0.4423511326313019, "epoch": 2.61, "learning_rate": 4.105851413543721e-05, "loss": 54.0159, "step": 3087, "task_loss": 0.37720760703086853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49370838273677176, "compression/movement_sparsity/importance_threshold": -0.003601301974663294, "compression/movement_sparsity/linear_layer_sparsity": 0.42155656510328426, "compression/movement_sparsity/model_sparsity": 0.40707480023230347, "compression_loss": 52.9456787109375, "distillation_loss": 0.6550344824790955, "epoch": 2.61, "learning_rate": 4.1053817976894905e-05, "loss": 53.9305, "step": 3088, "task_loss": 1.2848150730133057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4942447085579118, "compression/movement_sparsity/importance_threshold": -0.003597487036448091, "compression/movement_sparsity/linear_layer_sparsity": 0.422297067837639, "compression/movement_sparsity/model_sparsity": 0.40778986441968024, "compression_loss": 53.0028190612793, "distillation_loss": 1.5023353099822998, "epoch": 2.61, "learning_rate": 4.1049121818352585e-05, "loss": 54.3121, "step": 3089, "task_loss": 1.2272896766662598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4947806554841041, "compression/movement_sparsity/importance_threshold": -0.0035936747933498457, "compression/movement_sparsity/linear_layer_sparsity": 0.42303165618484634, "compression/movement_sparsity/model_sparsity": 0.40849921739730294, "compression_loss": 53.05994415283203, "distillation_loss": 1.7257694005966187, "epoch": 2.61, "learning_rate": 4.104442565981028e-05, "loss": 54.4724, "step": 3090, "task_loss": 1.4664872884750366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49531622364923356, "compression/movement_sparsity/importance_threshold": -0.0035898652444162227, "compression/movement_sparsity/linear_layer_sparsity": 0.42376263150926, "compression/movement_sparsity/model_sparsity": 0.4092050814705799, "compression_loss": 53.117027282714844, "distillation_loss": 1.5110735893249512, "epoch": 2.61, "learning_rate": 4.1039729501267964e-05, "loss": 54.2917, "step": 3091, "task_loss": 0.7679189443588257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4958514131871853, "compression/movement_sparsity/importance_threshold": -0.003586058388694883, "compression/movement_sparsity/linear_layer_sparsity": 0.42435793365431285, "compression/movement_sparsity/model_sparsity": 0.40977993315558653, "compression_loss": 53.174072265625, "distillation_loss": 0.6915276646614075, "epoch": 2.61, "learning_rate": 4.103503334272565e-05, "loss": 54.2216, "step": 3092, "task_loss": 0.35551342368125916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49638622423184464, "compression/movement_sparsity/importance_threshold": -0.003582254225233488, "compression/movement_sparsity/linear_layer_sparsity": 0.4249873985396424, "compression/movement_sparsity/model_sparsity": 0.4103877739856445, "compression_loss": 53.231075286865234, "distillation_loss": 1.193389892578125, "epoch": 2.61, "learning_rate": 4.103033718418334e-05, "loss": 54.3929, "step": 3093, "task_loss": 0.4221054017543793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4969206569170964, "compression/movement_sparsity/importance_threshold": -0.0035784527530797012, "compression/movement_sparsity/linear_layer_sparsity": 0.4257046491471072, "compression/movement_sparsity/model_sparsity": 0.41108038482822196, "compression_loss": 53.288021087646484, "distillation_loss": 1.6217548847198486, "epoch": 2.61, "learning_rate": 4.1025641025641023e-05, "loss": 54.4132, "step": 3094, "task_loss": 1.8391356468200684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49745471137682606, "compression/movement_sparsity/importance_threshold": -0.0035746539712811823, "compression/movement_sparsity/linear_layer_sparsity": 0.42641652195496826, "compression/movement_sparsity/model_sparsity": 0.41176780261515605, "compression_loss": 53.344966888427734, "distillation_loss": 1.0456455945968628, "epoch": 2.62, "learning_rate": 4.1020944867098717e-05, "loss": 54.6603, "step": 3095, "task_loss": 1.1955612897872925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4979883877449186, "compression/movement_sparsity/importance_threshold": -0.0035708578788855947, "compression/movement_sparsity/linear_layer_sparsity": 0.4272000232378179, "compression/movement_sparsity/model_sparsity": 0.41252438821860793, "compression_loss": 53.40183639526367, "distillation_loss": 1.0704319477081299, "epoch": 2.62, "learning_rate": 4.10162487085564e-05, "loss": 54.6551, "step": 3096, "task_loss": 1.0191529989242554 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49852168615525916, "compression/movement_sparsity/importance_threshold": -0.0035670644749406, "compression/movement_sparsity/linear_layer_sparsity": 0.42788106014817257, "compression/movement_sparsity/model_sparsity": 0.4131820294159773, "compression_loss": 53.458709716796875, "distillation_loss": 1.576358675956726, "epoch": 2.62, "learning_rate": 4.1011552550014096e-05, "loss": 54.8996, "step": 3097, "task_loss": 1.2849819660186768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49905460674173285, "compression/movement_sparsity/importance_threshold": -0.0035632737584938593, "compression/movement_sparsity/linear_layer_sparsity": 0.4285544655912403, "compression/movement_sparsity/model_sparsity": 0.41383230131043824, "compression_loss": 53.5155029296875, "distillation_loss": 1.355388879776001, "epoch": 2.62, "learning_rate": 4.1006856391471776e-05, "loss": 54.6309, "step": 3098, "task_loss": 1.430747151374817 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4995871496382247, "compression/movement_sparsity/importance_threshold": -0.003559485728593037, "compression/movement_sparsity/linear_layer_sparsity": 0.4293909221025608, "compression/movement_sparsity/model_sparsity": 0.4146400229673537, "compression_loss": 53.57227325439453, "distillation_loss": 2.5909767150878906, "epoch": 2.62, "learning_rate": 4.100216023292946e-05, "loss": 55.0672, "step": 3099, "task_loss": 2.2538931369781494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5001193149786202, "compression/movement_sparsity/importance_threshold": -0.0035557003842857913, "compression/movement_sparsity/linear_layer_sparsity": 0.4301664580414297, "compression/movement_sparsity/model_sparsity": 0.4153889168608948, "compression_loss": 53.62897872924805, "distillation_loss": 0.6293074488639832, "epoch": 2.62, "learning_rate": 4.0997464074387155e-05, "loss": 55.0426, "step": 3100, "task_loss": 0.42906421422958374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5006511028968043, "compression/movement_sparsity/importance_threshold": -0.0035519177246197855, "compression/movement_sparsity/linear_layer_sparsity": 0.4308934149213501, "compression/movement_sparsity/model_sparsity": 0.416090900535609, "compression_loss": 53.6856575012207, "distillation_loss": 2.12246036529541, "epoch": 2.62, "learning_rate": 4.099276791584484e-05, "loss": 55.1794, "step": 3101, "task_loss": 1.9462848901748657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5011825135266621, "compression/movement_sparsity/importance_threshold": -0.0035481377486426815, "compression/movement_sparsity/linear_layer_sparsity": 0.4316114286755436, "compression/movement_sparsity/model_sparsity": 0.41678424830847727, "compression_loss": 53.7423095703125, "distillation_loss": 1.925506353378296, "epoch": 2.62, "learning_rate": 4.098807175730253e-05, "loss": 54.9927, "step": 3102, "task_loss": 1.6994175910949707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5017135470020788, "compression/movement_sparsity/importance_threshold": -0.003544360455402141, "compression/movement_sparsity/linear_layer_sparsity": 0.43227730996883307, "compression/movement_sparsity/model_sparsity": 0.41742725453085183, "compression_loss": 53.79890441894531, "distillation_loss": 1.2417391538619995, "epoch": 2.62, "learning_rate": 4.0983375598760214e-05, "loss": 55.0226, "step": 3103, "task_loss": 0.497470498085022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5022442034569394, "compression/movement_sparsity/importance_threshold": -0.0035405858439458275, "compression/movement_sparsity/linear_layer_sparsity": 0.4328932886205665, "compression/movement_sparsity/model_sparsity": 0.4180220724209262, "compression_loss": 53.85541534423828, "distillation_loss": 1.5691370964050293, "epoch": 2.62, "learning_rate": 4.097867944021791e-05, "loss": 55.326, "step": 3104, "task_loss": 1.1506019830703735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5027744830251293, "compression/movement_sparsity/importance_threshold": -0.003536813913321401, "compression/movement_sparsity/linear_layer_sparsity": 0.43374975388518, "compression/movement_sparsity/model_sparsity": 0.4188491154689049, "compression_loss": 53.91194534301758, "distillation_loss": 1.2251501083374023, "epoch": 2.62, "learning_rate": 4.0973983281675594e-05, "loss": 55.3893, "step": 3105, "task_loss": 1.2659884691238403 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5033043858405335, "compression/movement_sparsity/importance_threshold": -0.003533044662576523, "compression/movement_sparsity/linear_layer_sparsity": 0.4344444558916454, "compression/movement_sparsity/model_sparsity": 0.41951995232429484, "compression_loss": 53.96840286254883, "distillation_loss": 1.2460529804229736, "epoch": 2.63, "learning_rate": 4.096928712313327e-05, "loss": 55.1002, "step": 3106, "task_loss": 0.9062367677688599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5038339120370371, "compression/movement_sparsity/importance_threshold": -0.003529278090758857, "compression/movement_sparsity/linear_layer_sparsity": 0.43496536315481804, "compression/movement_sparsity/model_sparsity": 0.42002296482047935, "compression_loss": 54.02482223510742, "distillation_loss": 1.1969376802444458, "epoch": 2.63, "learning_rate": 4.0964590964590966e-05, "loss": 55.3766, "step": 3107, "task_loss": 0.9070699214935303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5043630617485253, "compression/movement_sparsity/importance_threshold": -0.0035255141969160633, "compression/movement_sparsity/linear_layer_sparsity": 0.43567690208598525, "compression/movement_sparsity/model_sparsity": 0.4207100602004112, "compression_loss": 54.08120346069336, "distillation_loss": 0.8574906587600708, "epoch": 2.63, "learning_rate": 4.095989480604865e-05, "loss": 55.4683, "step": 3108, "task_loss": 0.5444690585136414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5048918351088829, "compression/movement_sparsity/importance_threshold": -0.003521752980095807, "compression/movement_sparsity/linear_layer_sparsity": 0.4363134141543876, "compression/movement_sparsity/model_sparsity": 0.42132470612112377, "compression_loss": 54.137508392333984, "distillation_loss": 1.334888219833374, "epoch": 2.63, "learning_rate": 4.0955198647506346e-05, "loss": 55.5045, "step": 3109, "task_loss": 0.9168862700462341 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5054202322519957, "compression/movement_sparsity/importance_threshold": -0.003517994439345745, "compression/movement_sparsity/linear_layer_sparsity": 0.4370095709093046, "compression/movement_sparsity/model_sparsity": 0.42199694774988067, "compression_loss": 54.193824768066406, "distillation_loss": 0.7136270403862, "epoch": 2.63, "learning_rate": 4.095050248896403e-05, "loss": 55.4981, "step": 3110, "task_loss": 0.9455158710479736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5059482533117485, "compression/movement_sparsity/importance_threshold": -0.0035142385737135418, "compression/movement_sparsity/linear_layer_sparsity": 0.4376249533526562, "compression/movement_sparsity/model_sparsity": 0.4225911899131653, "compression_loss": 54.25004959106445, "distillation_loss": 1.2264740467071533, "epoch": 2.63, "learning_rate": 4.094580633042172e-05, "loss": 55.6459, "step": 3111, "task_loss": 0.4249628186225891 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5064758984220263, "compression/movement_sparsity/importance_threshold": -0.00351048538224686, "compression/movement_sparsity/linear_layer_sparsity": 0.4384557816568526, "compression/movement_sparsity/model_sparsity": 0.42339347670918576, "compression_loss": 54.3062744140625, "distillation_loss": 1.1593600511550903, "epoch": 2.63, "learning_rate": 4.0941110171879405e-05, "loss": 55.8479, "step": 3112, "task_loss": 1.8123754262924194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5070031677167146, "compression/movement_sparsity/importance_threshold": -0.003506734863993359, "compression/movement_sparsity/linear_layer_sparsity": 0.43908484112048257, "compression/movement_sparsity/model_sparsity": 0.42400092604502676, "compression_loss": 54.36247634887695, "distillation_loss": 1.4451532363891602, "epoch": 2.63, "learning_rate": 4.093641401333709e-05, "loss": 55.5954, "step": 3113, "task_loss": 1.9382447004318237 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5075300613296982, "compression/movement_sparsity/importance_threshold": -0.003502987018000703, "compression/movement_sparsity/linear_layer_sparsity": 0.4398847262096639, "compression/movement_sparsity/model_sparsity": 0.4247733326206603, "compression_loss": 54.418601989746094, "distillation_loss": 1.1887192726135254, "epoch": 2.63, "learning_rate": 4.0931717854794784e-05, "loss": 55.3999, "step": 3114, "task_loss": 1.1321805715560913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5080565793948624, "compression/movement_sparsity/importance_threshold": -0.003499241843316553, "compression/movement_sparsity/linear_layer_sparsity": 0.4404615697432164, "compression/movement_sparsity/model_sparsity": 0.42533035980425704, "compression_loss": 54.47467041015625, "distillation_loss": 1.3669301271438599, "epoch": 2.63, "learning_rate": 4.0927021696252464e-05, "loss": 55.7374, "step": 3115, "task_loss": 1.3973315954208374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5085827220460923, "compression/movement_sparsity/importance_threshold": -0.0034954993389885703, "compression/movement_sparsity/linear_layer_sparsity": 0.44119735050718734, "compression/movement_sparsity/model_sparsity": 0.42604086423545917, "compression_loss": 54.530723571777344, "distillation_loss": 1.01876962184906, "epoch": 2.63, "learning_rate": 4.092232553771016e-05, "loss": 55.803, "step": 3116, "task_loss": 1.366461992263794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.509108489417273, "compression/movement_sparsity/importance_threshold": -0.003491759504064418, "compression/movement_sparsity/linear_layer_sparsity": 0.4419622977851956, "compression/movement_sparsity/model_sparsity": 0.42677953322121476, "compression_loss": 54.586700439453125, "distillation_loss": 1.937699317932129, "epoch": 2.63, "learning_rate": 4.091762937916784e-05, "loss": 56.0356, "step": 3117, "task_loss": 2.324974775314331 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5096338816422896, "compression/movement_sparsity/importance_threshold": -0.003488022337591758, "compression/movement_sparsity/linear_layer_sparsity": 0.4426568447774817, "compression/movement_sparsity/model_sparsity": 0.42745022038763936, "compression_loss": 54.642662048339844, "distillation_loss": 1.9630446434020996, "epoch": 2.64, "learning_rate": 4.091293322062553e-05, "loss": 56.6872, "step": 3118, "task_loss": 1.8795651197433472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5101588988550273, "compression/movement_sparsity/importance_threshold": -0.0034842878386182516, "compression/movement_sparsity/linear_layer_sparsity": 0.44341858445439586, "compression/movement_sparsity/model_sparsity": 0.4281857919632662, "compression_loss": 54.69858169555664, "distillation_loss": 1.0942838191986084, "epoch": 2.64, "learning_rate": 4.0908237062083216e-05, "loss": 55.7813, "step": 3119, "task_loss": 0.22704675793647766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5106835411893714, "compression/movement_sparsity/importance_threshold": -0.003480556006191559, "compression/movement_sparsity/linear_layer_sparsity": 0.444203707424044, "compression/movement_sparsity/model_sparsity": 0.42894394354358617, "compression_loss": 54.75447082519531, "distillation_loss": 1.0614807605743408, "epoch": 2.64, "learning_rate": 4.09035409035409e-05, "loss": 56.0027, "step": 3120, "task_loss": 0.20049752295017242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5112078087792069, "compression/movement_sparsity/importance_threshold": -0.0034768268393593443, "compression/movement_sparsity/linear_layer_sparsity": 0.44481764704311166, "compression/movement_sparsity/model_sparsity": 0.4295367924480397, "compression_loss": 54.81031036376953, "distillation_loss": 1.6732330322265625, "epoch": 2.64, "learning_rate": 4.0898844744998595e-05, "loss": 56.084, "step": 3121, "task_loss": 0.8696632981300354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.511731701758419, "compression/movement_sparsity/importance_threshold": -0.003473100337169268, "compression/movement_sparsity/linear_layer_sparsity": 0.44546514126990666, "compression/movement_sparsity/model_sparsity": 0.43016204325621904, "compression_loss": 54.86609649658203, "distillation_loss": 2.382519245147705, "epoch": 2.64, "learning_rate": 4.089414858645628e-05, "loss": 56.6861, "step": 3122, "task_loss": 0.5684500336647034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5122552202608928, "compression/movement_sparsity/importance_threshold": -0.003469376498668992, "compression/movement_sparsity/linear_layer_sparsity": 0.44611098996156795, "compression/movement_sparsity/model_sparsity": 0.43078570505845876, "compression_loss": 54.921897888183594, "distillation_loss": 2.273153066635132, "epoch": 2.64, "learning_rate": 4.088945242791397e-05, "loss": 56.6101, "step": 3123, "task_loss": 1.6830525398254395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5127783644205133, "compression/movement_sparsity/importance_threshold": -0.00346565532290618, "compression/movement_sparsity/linear_layer_sparsity": 0.44689767499717636, "compression/movement_sparsity/model_sparsity": 0.4315453650429678, "compression_loss": 54.977630615234375, "distillation_loss": 1.2914538383483887, "epoch": 2.64, "learning_rate": 4.0884756269371654e-05, "loss": 56.1616, "step": 3124, "task_loss": 0.37520796060562134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5133011343711658, "compression/movement_sparsity/importance_threshold": -0.0034619368089284924, "compression/movement_sparsity/linear_layer_sparsity": 0.4475468028349375, "compression/movement_sparsity/model_sparsity": 0.432172193342551, "compression_loss": 55.03335189819336, "distillation_loss": 1.001711130142212, "epoch": 2.64, "learning_rate": 4.088006011082934e-05, "loss": 56.2355, "step": 3125, "task_loss": 0.5179257392883301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5138235302467355, "compression/movement_sparsity/importance_threshold": -0.0034582209557835896, "compression/movement_sparsity/linear_layer_sparsity": 0.4481399944023187, "compression/movement_sparsity/model_sparsity": 0.43274500695472207, "compression_loss": 55.08899688720703, "distillation_loss": 1.002976417541504, "epoch": 2.64, "learning_rate": 4.0875363952287034e-05, "loss": 56.593, "step": 3126, "task_loss": 0.7511088252067566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5143455521811073, "compression/movement_sparsity/importance_threshold": -0.003454507762519137, "compression/movement_sparsity/linear_layer_sparsity": 0.448787643643293, "compression/movement_sparsity/model_sparsity": 0.4333704074518667, "compression_loss": 55.14460372924805, "distillation_loss": 1.317234992980957, "epoch": 2.64, "learning_rate": 4.087066779374472e-05, "loss": 56.2474, "step": 3127, "task_loss": 0.7483881711959839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5148672003081667, "compression/movement_sparsity/importance_threshold": -0.003450797228182793, "compression/movement_sparsity/linear_layer_sparsity": 0.4494537514957676, "compression/movement_sparsity/model_sparsity": 0.43401363245042135, "compression_loss": 55.200164794921875, "distillation_loss": 1.8707051277160645, "epoch": 2.64, "learning_rate": 4.0865971635202406e-05, "loss": 56.9532, "step": 3128, "task_loss": 1.2911224365234375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5153884747617983, "compression/movement_sparsity/importance_threshold": -0.0034470893518222232, "compression/movement_sparsity/linear_layer_sparsity": 0.450058628747432, "compression/movement_sparsity/model_sparsity": 0.43459773030767107, "compression_loss": 55.25568771362305, "distillation_loss": 0.7611358165740967, "epoch": 2.64, "learning_rate": 4.086127547666009e-05, "loss": 56.444, "step": 3129, "task_loss": 0.8175849318504333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5159093756758878, "compression/movement_sparsity/importance_threshold": -0.0034433841324850857, "compression/movement_sparsity/linear_layer_sparsity": 0.45077839535426795, "compression/movement_sparsity/model_sparsity": 0.4352927707173011, "compression_loss": 55.311180114746094, "distillation_loss": 1.529210090637207, "epoch": 2.65, "learning_rate": 4.0856579318117786e-05, "loss": 56.7764, "step": 3130, "task_loss": 1.3315836191177368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5164299031843201, "compression/movement_sparsity/importance_threshold": -0.0034396815692190434, "compression/movement_sparsity/linear_layer_sparsity": 0.4514484143337271, "compression/movement_sparsity/model_sparsity": 0.4359397724835964, "compression_loss": 55.36661148071289, "distillation_loss": 1.5472978353500366, "epoch": 2.65, "learning_rate": 4.085188315957547e-05, "loss": 56.8275, "step": 3131, "task_loss": 1.1783679723739624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5169500574209804, "compression/movement_sparsity/importance_threshold": -0.0034359816610717584, "compression/movement_sparsity/linear_layer_sparsity": 0.45206529922220084, "compression/movement_sparsity/model_sparsity": 0.4365354654783911, "compression_loss": 55.42203140258789, "distillation_loss": 1.3587344884872437, "epoch": 2.65, "learning_rate": 4.084718700103315e-05, "loss": 56.8144, "step": 3132, "task_loss": 1.0638813972473145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5174698385197538, "compression/movement_sparsity/importance_threshold": -0.003432284407090893, "compression/movement_sparsity/linear_layer_sparsity": 0.45272952305618896, "compression/movement_sparsity/model_sparsity": 0.43717687118029025, "compression_loss": 55.47739028930664, "distillation_loss": 1.0647530555725098, "epoch": 2.65, "learning_rate": 4.0842490842490845e-05, "loss": 56.8014, "step": 3133, "task_loss": 0.8790706992149353 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5179892466145253, "compression/movement_sparsity/importance_threshold": -0.00342858980632411, "compression/movement_sparsity/linear_layer_sparsity": 0.4533965848420744, "compression/movement_sparsity/model_sparsity": 0.4378210173417084, "compression_loss": 55.53273010253906, "distillation_loss": 1.3509814739227295, "epoch": 2.65, "learning_rate": 4.083779468394853e-05, "loss": 57.4055, "step": 3134, "task_loss": 1.1022142171859741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5185082818391804, "compression/movement_sparsity/importance_threshold": -0.003424897857819068, "compression/movement_sparsity/linear_layer_sparsity": 0.4541401878600145, "compression/movement_sparsity/model_sparsity": 0.4385390753083918, "compression_loss": 55.5880241394043, "distillation_loss": 0.8181122541427612, "epoch": 2.65, "learning_rate": 4.0833098525406224e-05, "loss": 57.3742, "step": 3135, "task_loss": 1.1470204591751099 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5190269443276039, "compression/movement_sparsity/importance_threshold": -0.003421208560623432, "compression/movement_sparsity/linear_layer_sparsity": 0.45490058010598583, "compression/movement_sparsity/model_sparsity": 0.43927334574147386, "compression_loss": 55.64328384399414, "distillation_loss": 1.0207005739212036, "epoch": 2.65, "learning_rate": 4.0828402366863904e-05, "loss": 57.0131, "step": 3136, "task_loss": 1.6122665405273438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.519545234213681, "compression/movement_sparsity/importance_threshold": -0.0034175219137848626, "compression/movement_sparsity/linear_layer_sparsity": 0.4556642395738894, "compression/movement_sparsity/model_sparsity": 0.44001077115736364, "compression_loss": 55.698482513427734, "distillation_loss": 1.1060789823532104, "epoch": 2.65, "learning_rate": 4.08237062083216e-05, "loss": 56.8665, "step": 3137, "task_loss": 1.1876758337020874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5200631516312969, "compression/movement_sparsity/importance_threshold": -0.0034138379163510223, "compression/movement_sparsity/linear_layer_sparsity": 0.45631681349609726, "compression/movement_sparsity/model_sparsity": 0.44064092715779146, "compression_loss": 55.75369644165039, "distillation_loss": 1.0509774684906006, "epoch": 2.65, "learning_rate": 4.081901004977928e-05, "loss": 57.1325, "step": 3138, "task_loss": 1.0492792129516602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5205806967143366, "compression/movement_sparsity/importance_threshold": -0.0034101565673695733, "compression/movement_sparsity/linear_layer_sparsity": 0.4569814069792821, "compression/movement_sparsity/model_sparsity": 0.4412826898103002, "compression_loss": 55.80885314941406, "distillation_loss": 1.3626699447631836, "epoch": 2.65, "learning_rate": 4.081431389123697e-05, "loss": 57.277, "step": 3139, "task_loss": 1.004623293876648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5210978695966855, "compression/movement_sparsity/importance_threshold": -0.003406477865888176, "compression/movement_sparsity/linear_layer_sparsity": 0.4576851236564802, "compression/movement_sparsity/model_sparsity": 0.44196223165475085, "compression_loss": 55.863948822021484, "distillation_loss": 1.1226223707199097, "epoch": 2.65, "learning_rate": 4.080961773269466e-05, "loss": 57.1498, "step": 3140, "task_loss": 1.755551815032959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5216146704122288, "compression/movement_sparsity/importance_threshold": -0.0034028018109544915, "compression/movement_sparsity/linear_layer_sparsity": 0.4583693562437613, "compression/movement_sparsity/model_sparsity": 0.4426229587477132, "compression_loss": 55.91901397705078, "distillation_loss": 1.1340712308883667, "epoch": 2.65, "learning_rate": 4.080492157415234e-05, "loss": 57.2685, "step": 3141, "task_loss": 0.9657101631164551 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5221310992948514, "compression/movement_sparsity/importance_threshold": -0.0033991284016161834, "compression/movement_sparsity/linear_layer_sparsity": 0.45903336544273193, "compression/movement_sparsity/model_sparsity": 0.44326415718796797, "compression_loss": 55.97404479980469, "distillation_loss": 1.0440092086791992, "epoch": 2.66, "learning_rate": 4.0800225415610036e-05, "loss": 57.0859, "step": 3142, "task_loss": 0.4690335988998413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5226471563784383, "compression/movement_sparsity/importance_threshold": -0.003395457636920914, "compression/movement_sparsity/linear_layer_sparsity": 0.4596867621325067, "compression/movement_sparsity/model_sparsity": 0.44389510769136564, "compression_loss": 56.02901077270508, "distillation_loss": 0.794431209564209, "epoch": 2.66, "learning_rate": 4.079552925706772e-05, "loss": 57.3883, "step": 3143, "task_loss": 1.544986605644226 }, { "compression/movement_sparsity/importance_regularization_factor": 0.523162841796875, "compression/movement_sparsity/importance_threshold": -0.0033917895159163436, "compression/movement_sparsity/linear_layer_sparsity": 0.4604550004807824, "compression/movement_sparsity/model_sparsity": 0.4446369546890005, "compression_loss": 56.08394241333008, "distillation_loss": 2.008721113204956, "epoch": 2.66, "learning_rate": 4.079083309852541e-05, "loss": 57.8064, "step": 3144, "task_loss": 1.0668280124664307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5236781556840464, "compression/movement_sparsity/importance_threshold": -0.0033881240376501353, "compression/movement_sparsity/linear_layer_sparsity": 0.4611392211438959, "compression/movement_sparsity/model_sparsity": 0.44529767026742706, "compression_loss": 56.13884735107422, "distillation_loss": 1.4892604351043701, "epoch": 2.66, "learning_rate": 4.0786136939983095e-05, "loss": 57.5771, "step": 3145, "task_loss": 2.2988531589508057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5241930981738379, "compression/movement_sparsity/importance_threshold": -0.0033844612011699497, "compression/movement_sparsity/linear_layer_sparsity": 0.46185117742093035, "compression/movement_sparsity/model_sparsity": 0.4459851686561117, "compression_loss": 56.193702697753906, "distillation_loss": 1.6211931705474854, "epoch": 2.66, "learning_rate": 4.078144078144078e-05, "loss": 58.0616, "step": 3146, "task_loss": 0.8327621221542358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5247076694001342, "compression/movement_sparsity/importance_threshold": -0.00338080100552345, "compression/movement_sparsity/linear_layer_sparsity": 0.4625184657660009, "compression/movement_sparsity/model_sparsity": 0.44662953359371, "compression_loss": 56.24851989746094, "distillation_loss": 1.4751760959625244, "epoch": 2.66, "learning_rate": 4.0776744622898474e-05, "loss": 57.3901, "step": 3147, "task_loss": 1.5031238794326782 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5252218694968209, "compression/movement_sparsity/importance_threshold": -0.003377143449758297, "compression/movement_sparsity/linear_layer_sparsity": 0.4632617945280853, "compression/movement_sparsity/model_sparsity": 0.4473473267260701, "compression_loss": 56.30329895019531, "distillation_loss": 1.260608434677124, "epoch": 2.66, "learning_rate": 4.077204846435616e-05, "loss": 58.0884, "step": 3148, "task_loss": 1.159328579902649 }, { "compression/movement_sparsity/importance_regularization_factor": 0.525735698597783, "compression/movement_sparsity/importance_threshold": -0.003373488532922152, "compression/movement_sparsity/linear_layer_sparsity": 0.4639672521337583, "compression/movement_sparsity/model_sparsity": 0.4480285496927467, "compression_loss": 56.35805130004883, "distillation_loss": 1.700018048286438, "epoch": 2.66, "learning_rate": 4.076735230581385e-05, "loss": 57.7866, "step": 3149, "task_loss": 1.1688836812973022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5262491568369052, "compression/movement_sparsity/importance_threshold": -0.0033698362540626812, "compression/movement_sparsity/linear_layer_sparsity": 0.46465780452988636, "compression/movement_sparsity/model_sparsity": 0.44869537948968014, "compression_loss": 56.41275405883789, "distillation_loss": 1.0559065341949463, "epoch": 2.66, "learning_rate": 4.076265614727153e-05, "loss": 57.9698, "step": 3150, "task_loss": 0.8785192966461182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5267622443480733, "compression/movement_sparsity/importance_threshold": -0.003366186612227541, "compression/movement_sparsity/linear_layer_sparsity": 0.46531907117030075, "compression/movement_sparsity/model_sparsity": 0.4493339295867022, "compression_loss": 56.46744918823242, "distillation_loss": 1.7425692081451416, "epoch": 2.66, "learning_rate": 4.075795998872922e-05, "loss": 58.0402, "step": 3151, "task_loss": 1.744755744934082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5272749612651723, "compression/movement_sparsity/importance_threshold": -0.0033625396064643947, "compression/movement_sparsity/linear_layer_sparsity": 0.46594643740212643, "compression/movement_sparsity/model_sparsity": 0.44993974385846036, "compression_loss": 56.522098541259766, "distillation_loss": 2.3773319721221924, "epoch": 2.66, "learning_rate": 4.075326383018691e-05, "loss": 58.4912, "step": 3152, "task_loss": 1.409574031829834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.527787307722087, "compression/movement_sparsity/importance_threshold": -0.003358895235820905, "compression/movement_sparsity/linear_layer_sparsity": 0.4666364055140404, "compression/movement_sparsity/model_sparsity": 0.4506060094431399, "compression_loss": 56.57669448852539, "distillation_loss": 0.8205479383468628, "epoch": 2.66, "learning_rate": 4.074856767164459e-05, "loss": 57.7765, "step": 3153, "task_loss": 0.4382261335849762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5282992838527029, "compression/movement_sparsity/importance_threshold": -0.003355253499344734, "compression/movement_sparsity/linear_layer_sparsity": 0.46721553848777897, "compression/movement_sparsity/model_sparsity": 0.45116524741760916, "compression_loss": 56.63125991821289, "distillation_loss": 1.2338604927062988, "epoch": 2.67, "learning_rate": 4.0743871513102285e-05, "loss": 58.0079, "step": 3154, "task_loss": 1.4214242696762085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5288108897909048, "compression/movement_sparsity/importance_threshold": -0.003351614396083543, "compression/movement_sparsity/linear_layer_sparsity": 0.467888967779182, "compression/movement_sparsity/model_sparsity": 0.45181554234114163, "compression_loss": 56.685768127441406, "distillation_loss": 1.1608073711395264, "epoch": 2.67, "learning_rate": 4.073917535455997e-05, "loss": 57.9021, "step": 3155, "task_loss": 1.4934239387512207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5293221256705782, "compression/movement_sparsity/importance_threshold": -0.0033479779250849932, "compression/movement_sparsity/linear_layer_sparsity": 0.46869105883937595, "compression/movement_sparsity/model_sparsity": 0.4525900791058972, "compression_loss": 56.740230560302734, "distillation_loss": 1.6131956577301025, "epoch": 2.67, "learning_rate": 4.073447919601766e-05, "loss": 58.4192, "step": 3156, "task_loss": 1.6098885536193848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.529832991625608, "compression/movement_sparsity/importance_threshold": -0.003344344085396748, "compression/movement_sparsity/linear_layer_sparsity": 0.4694692180951247, "compression/movement_sparsity/model_sparsity": 0.45334150619731317, "compression_loss": 56.79468536376953, "distillation_loss": 2.2845754623413086, "epoch": 2.67, "learning_rate": 4.072978303747535e-05, "loss": 58.3888, "step": 3157, "task_loss": 1.0710558891296387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5303434877898795, "compression/movement_sparsity/importance_threshold": -0.0033407128760664677, "compression/movement_sparsity/linear_layer_sparsity": 0.4701401790838271, "compression/movement_sparsity/model_sparsity": 0.4539894176119362, "compression_loss": 56.84900665283203, "distillation_loss": 1.6032233238220215, "epoch": 2.67, "learning_rate": 4.072508687893303e-05, "loss": 58.3302, "step": 3158, "task_loss": 1.6968669891357422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5308536142972777, "compression/movement_sparsity/importance_threshold": -0.0033370842961418148, "compression/movement_sparsity/linear_layer_sparsity": 0.4707618455635228, "compression/movement_sparsity/model_sparsity": 0.45458972793558455, "compression_loss": 56.90334701538086, "distillation_loss": 1.6694166660308838, "epoch": 2.67, "learning_rate": 4.0720390720390724e-05, "loss": 58.377, "step": 3159, "task_loss": 1.675429344177246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5313633712816875, "compression/movement_sparsity/importance_threshold": -0.0033334583446704537, "compression/movement_sparsity/linear_layer_sparsity": 0.47134342299162674, "compression/movement_sparsity/model_sparsity": 0.4551513263898917, "compression_loss": 56.95762252807617, "distillation_loss": 1.6153185367584229, "epoch": 2.67, "learning_rate": 4.071569456184841e-05, "loss": 58.6654, "step": 3160, "task_loss": 1.6935064792633057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5318727588769947, "compression/movement_sparsity/importance_threshold": -0.0033298350207000412, "compression/movement_sparsity/linear_layer_sparsity": 0.4720277867447518, "compression/movement_sparsity/model_sparsity": 0.45581218014274777, "compression_loss": 57.01188278198242, "distillation_loss": 1.6574807167053223, "epoch": 2.67, "learning_rate": 4.07109984033061e-05, "loss": 58.377, "step": 3161, "task_loss": 1.3766021728515625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5323817772170839, "compression/movement_sparsity/importance_threshold": -0.003326214323278243, "compression/movement_sparsity/linear_layer_sparsity": 0.47275485094218095, "compression/movement_sparsity/model_sparsity": 0.4565142674482841, "compression_loss": 57.06608200073242, "distillation_loss": 1.1291615962982178, "epoch": 2.67, "learning_rate": 4.070630224476378e-05, "loss": 58.4937, "step": 3162, "task_loss": 0.43847355246543884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5328904264358405, "compression/movement_sparsity/importance_threshold": -0.003322596251452719, "compression/movement_sparsity/linear_layer_sparsity": 0.4733886443003623, "compression/movement_sparsity/model_sparsity": 0.4571262880548355, "compression_loss": 57.120262145996094, "distillation_loss": 0.9550775289535522, "epoch": 2.67, "learning_rate": 4.070160608622147e-05, "loss": 58.3618, "step": 3163, "task_loss": 1.656665325164795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5333987066671495, "compression/movement_sparsity/importance_threshold": -0.003318980804271132, "compression/movement_sparsity/linear_layer_sparsity": 0.47400591076220044, "compression/movement_sparsity/model_sparsity": 0.45772234951477575, "compression_loss": 57.17440414428711, "distillation_loss": 2.3190383911132812, "epoch": 2.67, "learning_rate": 4.069690992767916e-05, "loss": 58.6096, "step": 3164, "task_loss": 2.5577399730682373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5339066180448961, "compression/movement_sparsity/importance_threshold": -0.0033153679807811434, "compression/movement_sparsity/linear_layer_sparsity": 0.4745925201890467, "compression/movement_sparsity/model_sparsity": 0.45828880710318814, "compression_loss": 57.228485107421875, "distillation_loss": 1.4734718799591064, "epoch": 2.67, "learning_rate": 4.069221376913685e-05, "loss": 58.5868, "step": 3165, "task_loss": 1.3831501007080078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5344141607029655, "compression/movement_sparsity/importance_threshold": -0.003311757780030415, "compression/movement_sparsity/linear_layer_sparsity": 0.4750966501483557, "compression/movement_sparsity/model_sparsity": 0.45877561864750976, "compression_loss": 57.282527923583984, "distillation_loss": 1.2893824577331543, "epoch": 2.68, "learning_rate": 4.0687517610594535e-05, "loss": 58.4284, "step": 3166, "task_loss": 1.558672308921814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5349213347752428, "compression/movement_sparsity/importance_threshold": -0.003308150201066609, "compression/movement_sparsity/linear_layer_sparsity": 0.4758768365126026, "compression/movement_sparsity/model_sparsity": 0.4595290032100108, "compression_loss": 57.33654022216797, "distillation_loss": 2.640058994293213, "epoch": 2.68, "learning_rate": 4.068282145205222e-05, "loss": 58.9094, "step": 3167, "task_loss": 2.182898759841919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.535428140395613, "compression/movement_sparsity/importance_threshold": -0.0033045452429373875, "compression/movement_sparsity/linear_layer_sparsity": 0.476504572393625, "compression/movement_sparsity/model_sparsity": 0.46013517443237856, "compression_loss": 57.39051055908203, "distillation_loss": 2.10050106048584, "epoch": 2.68, "learning_rate": 4.0678125293509914e-05, "loss": 59.1111, "step": 3168, "task_loss": 0.9084132313728333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5359345776979615, "compression/movement_sparsity/importance_threshold": -0.0033009429046904114, "compression/movement_sparsity/linear_layer_sparsity": 0.4772714394626226, "compression/movement_sparsity/model_sparsity": 0.4608756972583971, "compression_loss": 57.4444580078125, "distillation_loss": 1.4045181274414062, "epoch": 2.68, "learning_rate": 4.06734291349676e-05, "loss": 58.7435, "step": 3169, "task_loss": 1.5369977951049805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.536440646816173, "compression/movement_sparsity/importance_threshold": -0.0032973431853733455, "compression/movement_sparsity/linear_layer_sparsity": 0.4779406356745149, "compression/movement_sparsity/model_sparsity": 0.46152190452172254, "compression_loss": 57.49834060668945, "distillation_loss": 1.0421686172485352, "epoch": 2.68, "learning_rate": 4.066873297642528e-05, "loss": 58.658, "step": 3170, "task_loss": 0.7062011361122131 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5369463478841332, "compression/movement_sparsity/importance_threshold": -0.003293746084033847, "compression/movement_sparsity/linear_layer_sparsity": 0.47870554718002023, "compression/movement_sparsity/model_sparsity": 0.4622605389638707, "compression_loss": 57.552146911621094, "distillation_loss": 1.4098498821258545, "epoch": 2.68, "learning_rate": 4.066403681788297e-05, "loss": 58.7766, "step": 3171, "task_loss": 1.773332118988037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5374516810357268, "compression/movement_sparsity/importance_threshold": -0.003290151599719581, "compression/movement_sparsity/linear_layer_sparsity": 0.47947222346233564, "compression/movement_sparsity/model_sparsity": 0.4630008775573165, "compression_loss": 57.605926513671875, "distillation_loss": 0.6040321588516235, "epoch": 2.68, "learning_rate": 4.065934065934066e-05, "loss": 58.9032, "step": 3172, "task_loss": 0.25154149532318115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5379566464048393, "compression/movement_sparsity/importance_threshold": -0.0032865597314782075, "compression/movement_sparsity/linear_layer_sparsity": 0.4801798870390212, "compression/movement_sparsity/model_sparsity": 0.4636842307131151, "compression_loss": 57.65963363647461, "distillation_loss": 1.4139490127563477, "epoch": 2.68, "learning_rate": 4.065464450079835e-05, "loss": 59.1895, "step": 3173, "task_loss": 0.5305821299552917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5384612441253557, "compression/movement_sparsity/importance_threshold": -0.0032829704783573894, "compression/movement_sparsity/linear_layer_sparsity": 0.4808624979395038, "compression/movement_sparsity/model_sparsity": 0.4643433918292094, "compression_loss": 57.713348388671875, "distillation_loss": 0.9121890068054199, "epoch": 2.68, "learning_rate": 4.064994834225604e-05, "loss": 58.8341, "step": 3174, "task_loss": 0.7723836302757263 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5389654743311609, "compression/movement_sparsity/importance_threshold": -0.0032793838394047886, "compression/movement_sparsity/linear_layer_sparsity": 0.4815816087171198, "compression/movement_sparsity/model_sparsity": 0.46503779893937075, "compression_loss": 57.766963958740234, "distillation_loss": 1.8119970560073853, "epoch": 2.68, "learning_rate": 4.0645252183713725e-05, "loss": 59.4991, "step": 3175, "task_loss": 1.0688424110412598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5394693371561404, "compression/movement_sparsity/importance_threshold": -0.0032757998136680668, "compression/movement_sparsity/linear_layer_sparsity": 0.48224883744135216, "compression/movement_sparsity/model_sparsity": 0.4656821063042901, "compression_loss": 57.8205680847168, "distillation_loss": 1.6893210411071777, "epoch": 2.68, "learning_rate": 4.064055602517141e-05, "loss": 59.3051, "step": 3176, "task_loss": 1.5938782691955566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5399728327341792, "compression/movement_sparsity/importance_threshold": -0.0032722184001948854, "compression/movement_sparsity/linear_layer_sparsity": 0.4831042533792137, "compression/movement_sparsity/model_sparsity": 0.46650813607311886, "compression_loss": 57.8741340637207, "distillation_loss": 2.019838571548462, "epoch": 2.69, "learning_rate": 4.06358598666291e-05, "loss": 59.606, "step": 3177, "task_loss": 1.0248773097991943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5404759611991623, "compression/movement_sparsity/importance_threshold": -0.0032686395980329073, "compression/movement_sparsity/linear_layer_sparsity": 0.483801054039183, "compression/movement_sparsity/model_sparsity": 0.46718099948680863, "compression_loss": 57.92764663696289, "distillation_loss": 2.5831573009490967, "epoch": 2.69, "learning_rate": 4.063116370808679e-05, "loss": 59.7297, "step": 3178, "task_loss": 2.1144649982452393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.540978722684975, "compression/movement_sparsity/importance_threshold": -0.0032650634062297936, "compression/movement_sparsity/linear_layer_sparsity": 0.48449765198830247, "compression/movement_sparsity/model_sparsity": 0.46785366715338994, "compression_loss": 57.98114776611328, "distillation_loss": 1.9458868503570557, "epoch": 2.69, "learning_rate": 4.062646754954447e-05, "loss": 59.3554, "step": 3179, "task_loss": 1.7755376100540161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5414811173255023, "compression/movement_sparsity/importance_threshold": -0.003261489823833207, "compression/movement_sparsity/linear_layer_sparsity": 0.4851180187337259, "compression/movement_sparsity/model_sparsity": 0.4684527223926367, "compression_loss": 58.03459167480469, "distillation_loss": 1.7166094779968262, "epoch": 2.69, "learning_rate": 4.0621771391002164e-05, "loss": 59.4695, "step": 3180, "task_loss": 1.4208043813705444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5419831452546295, "compression/movement_sparsity/importance_threshold": -0.003257918849890808, "compression/movement_sparsity/linear_layer_sparsity": 0.4859344664917534, "compression/movement_sparsity/model_sparsity": 0.46924112265848894, "compression_loss": 58.087982177734375, "distillation_loss": 1.4935717582702637, "epoch": 2.69, "learning_rate": 4.061707523245985e-05, "loss": 59.6981, "step": 3181, "task_loss": 0.2886470854282379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5424848066062418, "compression/movement_sparsity/importance_threshold": -0.003254350483450259, "compression/movement_sparsity/linear_layer_sparsity": 0.48674176841320427, "compression/movement_sparsity/model_sparsity": 0.4700206912753867, "compression_loss": 58.14138412475586, "distillation_loss": 1.4131081104278564, "epoch": 2.69, "learning_rate": 4.0612379073917537e-05, "loss": 59.702, "step": 3182, "task_loss": 1.913451910018921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5429861015142242, "compression/movement_sparsity/importance_threshold": -0.0032507847235592213, "compression/movement_sparsity/linear_layer_sparsity": 0.48749414761852433, "compression/movement_sparsity/model_sparsity": 0.47074722394041485, "compression_loss": 58.194698333740234, "distillation_loss": 1.8330296277999878, "epoch": 2.69, "learning_rate": 4.060768291537522e-05, "loss": 59.5161, "step": 3183, "task_loss": 1.068109393119812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5434870301124619, "compression/movement_sparsity/importance_threshold": -0.0032472215692653577, "compression/movement_sparsity/linear_layer_sparsity": 0.4883033335584616, "compression/movement_sparsity/model_sparsity": 0.4715286118539682, "compression_loss": 58.247989654541016, "distillation_loss": 1.4911723136901855, "epoch": 2.69, "learning_rate": 4.060298675683291e-05, "loss": 59.5441, "step": 3184, "task_loss": 1.2152122259140015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5439875925348399, "compression/movement_sparsity/importance_threshold": -0.0032436610196163304, "compression/movement_sparsity/linear_layer_sparsity": 0.4891277228121346, "compression/movement_sparsity/model_sparsity": 0.47232468080065954, "compression_loss": 58.301231384277344, "distillation_loss": 1.8139543533325195, "epoch": 2.69, "learning_rate": 4.05982905982906e-05, "loss": 59.7615, "step": 3185, "task_loss": 1.7313438653945923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5444877889152435, "compression/movement_sparsity/importance_threshold": -0.0032401030736598, "compression/movement_sparsity/linear_layer_sparsity": 0.4898240107328956, "compression/movement_sparsity/model_sparsity": 0.4729970490893102, "compression_loss": 58.35443878173828, "distillation_loss": 1.4928539991378784, "epoch": 2.69, "learning_rate": 4.059359443974829e-05, "loss": 59.9408, "step": 3186, "task_loss": 1.249882698059082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5449876193875578, "compression/movement_sparsity/importance_threshold": -0.0032365477304434286, "compression/movement_sparsity/linear_layer_sparsity": 0.4906472314181403, "compression/movement_sparsity/model_sparsity": 0.4737919896114937, "compression_loss": 58.40763854980469, "distillation_loss": 2.6634926795959473, "epoch": 2.69, "learning_rate": 4.0588898281205975e-05, "loss": 60.4019, "step": 3187, "task_loss": 1.093407392501831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5454870840856679, "compression/movement_sparsity/importance_threshold": -0.003232994989014879, "compression/movement_sparsity/linear_layer_sparsity": 0.49137716933996967, "compression/movement_sparsity/model_sparsity": 0.4744968519201565, "compression_loss": 58.46076583862305, "distillation_loss": 1.942744493484497, "epoch": 2.69, "learning_rate": 4.058420212266366e-05, "loss": 60.3109, "step": 3188, "task_loss": 2.0792481899261475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.545986183143459, "compression/movement_sparsity/importance_threshold": -0.0032294448484218117, "compression/movement_sparsity/linear_layer_sparsity": 0.49188228900519243, "compression/movement_sparsity/model_sparsity": 0.47498461917094903, "compression_loss": 58.51389694213867, "distillation_loss": 1.8032346963882446, "epoch": 2.7, "learning_rate": 4.057950596412135e-05, "loss": 60.7206, "step": 3189, "task_loss": 1.3808916807174683 }, { "compression/movement_sparsity/importance_regularization_factor": 0.546484916694816, "compression/movement_sparsity/importance_threshold": -0.0032258973077118913, "compression/movement_sparsity/linear_layer_sparsity": 0.4927252441878745, "compression/movement_sparsity/model_sparsity": 0.4757986162498725, "compression_loss": 58.566951751708984, "distillation_loss": 2.260136604309082, "epoch": 2.7, "learning_rate": 4.057480980557904e-05, "loss": 60.4217, "step": 3190, "task_loss": 1.4605259895324707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5469832848736245, "compression/movement_sparsity/importance_threshold": -0.003222352365932776, "compression/movement_sparsity/linear_layer_sparsity": 0.49338854986095465, "compression/movement_sparsity/model_sparsity": 0.4764391353325154, "compression_loss": 58.6200065612793, "distillation_loss": 1.1866872310638428, "epoch": 2.7, "learning_rate": 4.057011364703673e-05, "loss": 60.1206, "step": 3191, "task_loss": 0.37923866510391235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5474812878137694, "compression/movement_sparsity/importance_threshold": -0.0032188100221321294, "compression/movement_sparsity/linear_layer_sparsity": 0.494095664925929, "compression/movement_sparsity/model_sparsity": 0.4771219588196675, "compression_loss": 58.67300796508789, "distillation_loss": 1.4412528276443481, "epoch": 2.7, "learning_rate": 4.0565417488494413e-05, "loss": 60.2655, "step": 3192, "task_loss": 0.7832079529762268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5479789256491356, "compression/movement_sparsity/importance_threshold": -0.003215270275357614, "compression/movement_sparsity/linear_layer_sparsity": 0.49482152478242686, "compression/movement_sparsity/model_sparsity": 0.4778228831570886, "compression_loss": 58.725982666015625, "distillation_loss": 0.9001073837280273, "epoch": 2.7, "learning_rate": 4.05607213299521e-05, "loss": 60.2552, "step": 3193, "task_loss": 1.5464485883712769 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5484761985136087, "compression/movement_sparsity/importance_threshold": -0.0032117331246568894, "compression/movement_sparsity/linear_layer_sparsity": 0.49549640882228146, "compression/movement_sparsity/model_sparsity": 0.47847458285398803, "compression_loss": 58.778907775878906, "distillation_loss": 0.9528909921646118, "epoch": 2.7, "learning_rate": 4.0556025171409786e-05, "loss": 60.4101, "step": 3194, "task_loss": 0.9075307846069336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5489731065410736, "compression/movement_sparsity/importance_threshold": -0.00320819856907762, "compression/movement_sparsity/linear_layer_sparsity": 0.4961910989045792, "compression/movement_sparsity/model_sparsity": 0.47914540819484214, "compression_loss": 58.831748962402344, "distillation_loss": 1.7657103538513184, "epoch": 2.7, "learning_rate": 4.055132901286748e-05, "loss": 60.5869, "step": 3195, "task_loss": 1.064477562904358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5494696498654155, "compression/movement_sparsity/importance_threshold": -0.003204666607667466, "compression/movement_sparsity/linear_layer_sparsity": 0.49693155394226335, "compression/movement_sparsity/model_sparsity": 0.4798604263240758, "compression_loss": 58.884586334228516, "distillation_loss": 1.3121531009674072, "epoch": 2.7, "learning_rate": 4.054663285432516e-05, "loss": 60.1378, "step": 3196, "task_loss": 0.32158011198043823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5499658286205193, "compression/movement_sparsity/importance_threshold": -0.0032011372394740896, "compression/movement_sparsity/linear_layer_sparsity": 0.4976613368499135, "compression/movement_sparsity/model_sparsity": 0.48056513894377323, "compression_loss": 58.93739318847656, "distillation_loss": 1.3147743940353394, "epoch": 2.7, "learning_rate": 4.054193669578285e-05, "loss": 60.3153, "step": 3197, "task_loss": 0.40439265966415405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5504616429402704, "compression/movement_sparsity/importance_threshold": -0.0031976104635451546, "compression/movement_sparsity/linear_layer_sparsity": 0.4984087913739999, "compression/movement_sparsity/model_sparsity": 0.4812869161055182, "compression_loss": 58.990177154541016, "distillation_loss": 2.9774866104125977, "epoch": 2.7, "learning_rate": 4.053724053724054e-05, "loss": 60.6457, "step": 3198, "task_loss": 1.4511774778366089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5509570929585539, "compression/movement_sparsity/importance_threshold": -0.00319408627892832, "compression/movement_sparsity/linear_layer_sparsity": 0.49904444490233246, "compression/movement_sparsity/model_sparsity": 0.48190073297965363, "compression_loss": 59.04288101196289, "distillation_loss": 2.0419106483459473, "epoch": 2.7, "learning_rate": 4.053254437869823e-05, "loss": 61.064, "step": 3199, "task_loss": 1.4099332094192505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5514521788092548, "compression/movement_sparsity/importance_threshold": -0.00319056468467125, "compression/movement_sparsity/linear_layer_sparsity": 0.4998529750130498, "compression/movement_sparsity/model_sparsity": 0.48268148759373825, "compression_loss": 59.095558166503906, "distillation_loss": 1.6840778589248657, "epoch": 2.7, "learning_rate": 4.052784822015591e-05, "loss": 60.5214, "step": 3200, "task_loss": 2.4168262481689453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5519469006262585, "compression/movement_sparsity/importance_threshold": -0.003187045679821604, "compression/movement_sparsity/linear_layer_sparsity": 0.5004548235261347, "compression/movement_sparsity/model_sparsity": 0.4832626607588961, "compression_loss": 59.148197174072266, "distillation_loss": 1.2769057750701904, "epoch": 2.71, "learning_rate": 4.05231520616136e-05, "loss": 60.5171, "step": 3201, "task_loss": 1.5145009756088257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5524412585434499, "compression/movement_sparsity/importance_threshold": -0.003183529263427045, "compression/movement_sparsity/linear_layer_sparsity": 0.5010053027250443, "compression/movement_sparsity/model_sparsity": 0.4837942293038511, "compression_loss": 59.2008171081543, "distillation_loss": 1.4009939432144165, "epoch": 2.71, "learning_rate": 4.051845590307129e-05, "loss": 61.1621, "step": 3202, "task_loss": 2.2459006309509277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5529352526947144, "compression/movement_sparsity/importance_threshold": -0.0031800154345352346, "compression/movement_sparsity/linear_layer_sparsity": 0.5017200969539761, "compression/movement_sparsity/model_sparsity": 0.4844844681520548, "compression_loss": 59.25337219238281, "distillation_loss": 1.0997272729873657, "epoch": 2.71, "learning_rate": 4.051375974452898e-05, "loss": 60.7626, "step": 3203, "task_loss": 0.7806934118270874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5534288832139369, "compression/movement_sparsity/importance_threshold": -0.0031765041921938356, "compression/movement_sparsity/linear_layer_sparsity": 0.5023570144440781, "compression/movement_sparsity/model_sparsity": 0.48509950556698445, "compression_loss": 59.305908203125, "distillation_loss": 1.7321157455444336, "epoch": 2.71, "learning_rate": 4.050906358598667e-05, "loss": 61.0015, "step": 3204, "task_loss": 1.1556769609451294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5539221502350027, "compression/movement_sparsity/importance_threshold": -0.003172995535450509, "compression/movement_sparsity/linear_layer_sparsity": 0.5028610728583813, "compression/movement_sparsity/model_sparsity": 0.4855862480240913, "compression_loss": 59.358394622802734, "distillation_loss": 1.476398229598999, "epoch": 2.71, "learning_rate": 4.050436742744435e-05, "loss": 60.5933, "step": 3205, "task_loss": 0.8767927289009094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5544150538917967, "compression/movement_sparsity/importance_threshold": -0.003169489463352918, "compression/movement_sparsity/linear_layer_sparsity": 0.5034263022526565, "compression/movement_sparsity/model_sparsity": 0.48613206004982407, "compression_loss": 59.41087341308594, "distillation_loss": 1.5197722911834717, "epoch": 2.71, "learning_rate": 4.049967126890204e-05, "loss": 60.7528, "step": 3206, "task_loss": 1.0603898763656616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5549075943182042, "compression/movement_sparsity/importance_threshold": -0.003165985974948722, "compression/movement_sparsity/linear_layer_sparsity": 0.5040070330648583, "compression/movement_sparsity/model_sparsity": 0.4866928409720898, "compression_loss": 59.463287353515625, "distillation_loss": 2.91446590423584, "epoch": 2.71, "learning_rate": 4.049497511035973e-05, "loss": 61.3662, "step": 3207, "task_loss": 1.8381617069244385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5553997716481105, "compression/movement_sparsity/importance_threshold": -0.0031624850692855847, "compression/movement_sparsity/linear_layer_sparsity": 0.5046118745440198, "compression/movement_sparsity/model_sparsity": 0.4872769042857321, "compression_loss": 59.515647888183594, "distillation_loss": 1.1554421186447144, "epoch": 2.71, "learning_rate": 4.0490278951817415e-05, "loss": 61.2926, "step": 3208, "task_loss": 1.8352118730545044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5558915860154006, "compression/movement_sparsity/importance_threshold": -0.0031589867454111667, "compression/movement_sparsity/linear_layer_sparsity": 0.5052633752911405, "compression/movement_sparsity/model_sparsity": 0.4879060239779384, "compression_loss": 59.56797790527344, "distillation_loss": 2.0700783729553223, "epoch": 2.71, "learning_rate": 4.04855827932751e-05, "loss": 61.6754, "step": 3209, "task_loss": 3.1666927337646484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5563830375539592, "compression/movement_sparsity/importance_threshold": -0.0031554910023731337, "compression/movement_sparsity/linear_layer_sparsity": 0.5058790677628506, "compression/movement_sparsity/model_sparsity": 0.4885005655191537, "compression_loss": 59.62025451660156, "distillation_loss": 1.6682908535003662, "epoch": 2.71, "learning_rate": 4.048088663473279e-05, "loss": 60.9609, "step": 3210, "task_loss": 1.4685249328613281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5568741263976722, "compression/movement_sparsity/importance_threshold": -0.0031519978392191422, "compression/movement_sparsity/linear_layer_sparsity": 0.5065960679627951, "compression/movement_sparsity/model_sparsity": 0.4891929345564795, "compression_loss": 59.672489166259766, "distillation_loss": 1.6802546977996826, "epoch": 2.71, "learning_rate": 4.047619047619048e-05, "loss": 61.6184, "step": 3211, "task_loss": 1.1450345516204834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5573648526804245, "compression/movement_sparsity/importance_threshold": -0.003148507254996856, "compression/movement_sparsity/linear_layer_sparsity": 0.5072467459423489, "compression/movement_sparsity/model_sparsity": 0.489821259745716, "compression_loss": 59.724666595458984, "distillation_loss": 1.7522343397140503, "epoch": 2.71, "learning_rate": 4.047149431764817e-05, "loss": 61.3943, "step": 3212, "task_loss": 1.7336397171020508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.557855216536101, "compression/movement_sparsity/importance_threshold": -0.0031450192487539374, "compression/movement_sparsity/linear_layer_sparsity": 0.5078989740636953, "compression/movement_sparsity/model_sparsity": 0.4904510818246058, "compression_loss": 59.77683639526367, "distillation_loss": 2.6297526359558105, "epoch": 2.72, "learning_rate": 4.0466798159105854e-05, "loss": 61.273, "step": 3213, "task_loss": 1.7166067361831665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5583452180985871, "compression/movement_sparsity/importance_threshold": -0.003141533819538048, "compression/movement_sparsity/linear_layer_sparsity": 0.5084118086653785, "compression/movement_sparsity/model_sparsity": 0.49094629898005737, "compression_loss": 59.82892990112305, "distillation_loss": 1.5280091762542725, "epoch": 2.72, "learning_rate": 4.046210200056354e-05, "loss": 61.809, "step": 3214, "task_loss": 0.7874156832695007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5588348575017676, "compression/movement_sparsity/importance_threshold": -0.0031380509663968504, "compression/movement_sparsity/linear_layer_sparsity": 0.5091664892350523, "compression/movement_sparsity/model_sparsity": 0.4916750539504939, "compression_loss": 59.881019592285156, "distillation_loss": 2.173506736755371, "epoch": 2.72, "learning_rate": 4.0457405842021226e-05, "loss": 61.313, "step": 3215, "task_loss": 1.3392128944396973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5593241348795281, "compression/movement_sparsity/importance_threshold": -0.0031345706883780045, "compression/movement_sparsity/linear_layer_sparsity": 0.509806065814537, "compression/movement_sparsity/model_sparsity": 0.4922926591069057, "compression_loss": 59.93303680419922, "distillation_loss": 1.315466284751892, "epoch": 2.72, "learning_rate": 4.045270968347892e-05, "loss": 61.6233, "step": 3216, "task_loss": 0.613333523273468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5598130503657535, "compression/movement_sparsity/importance_threshold": -0.0031310929845291746, "compression/movement_sparsity/linear_layer_sparsity": 0.5103875001526293, "compression/movement_sparsity/model_sparsity": 0.49285411938678325, "compression_loss": 59.98503494262695, "distillation_loss": 1.5251576900482178, "epoch": 2.72, "learning_rate": 4.0448013524936606e-05, "loss": 61.484, "step": 3217, "task_loss": 1.0608320236206055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5603016040943287, "compression/movement_sparsity/importance_threshold": -0.0031276178538980227, "compression/movement_sparsity/linear_layer_sparsity": 0.5110532264317396, "compression/movement_sparsity/model_sparsity": 0.4934969759201925, "compression_loss": 60.03696823120117, "distillation_loss": 2.0073866844177246, "epoch": 2.72, "learning_rate": 4.044331736639429e-05, "loss": 61.5906, "step": 3218, "task_loss": 0.9942153096199036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5607897961991393, "compression/movement_sparsity/importance_threshold": -0.003124145295532208, "compression/movement_sparsity/linear_layer_sparsity": 0.5117834386094245, "compression/movement_sparsity/model_sparsity": 0.4942021030631786, "compression_loss": 60.08891677856445, "distillation_loss": 2.0025384426116943, "epoch": 2.72, "learning_rate": 4.043862120785198e-05, "loss": 61.6576, "step": 3219, "task_loss": 1.3460665941238403 }, { "compression/movement_sparsity/importance_regularization_factor": 0.56127762681407, "compression/movement_sparsity/importance_threshold": -0.0031206753084793946, "compression/movement_sparsity/linear_layer_sparsity": 0.5125281863474577, "compression/movement_sparsity/model_sparsity": 0.4949212664252982, "compression_loss": 60.14077377319336, "distillation_loss": 1.553736686706543, "epoch": 2.72, "learning_rate": 4.0433925049309665e-05, "loss": 61.6609, "step": 3220, "task_loss": 1.0792765617370605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5617650960730064, "compression/movement_sparsity/importance_threshold": -0.0031172078917872425, "compression/movement_sparsity/linear_layer_sparsity": 0.5132096763761825, "compression/movement_sparsity/model_sparsity": 0.4955793451750278, "compression_loss": 60.1926155090332, "distillation_loss": 1.360224962234497, "epoch": 2.72, "learning_rate": 4.042922889076736e-05, "loss": 61.8377, "step": 3221, "task_loss": 0.558312177658081 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5622522041098333, "compression/movement_sparsity/importance_threshold": -0.0031137430445034144, "compression/movement_sparsity/linear_layer_sparsity": 0.5138803869573645, "compression/movement_sparsity/model_sparsity": 0.4962270147843991, "compression_loss": 60.24443435668945, "distillation_loss": 1.542664885520935, "epoch": 2.72, "learning_rate": 4.042453273222504e-05, "loss": 61.9112, "step": 3222, "task_loss": 1.6772557497024536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5627389510584361, "compression/movement_sparsity/importance_threshold": -0.0031102807656755724, "compression/movement_sparsity/linear_layer_sparsity": 0.514610551438379, "compression/movement_sparsity/model_sparsity": 0.49693209586924203, "compression_loss": 60.29618453979492, "distillation_loss": 2.0261876583099365, "epoch": 2.72, "learning_rate": 4.041983657368273e-05, "loss": 62.4113, "step": 3223, "task_loss": 1.7311145067214966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5632253370526996, "compression/movement_sparsity/importance_threshold": -0.0031068210543513794, "compression/movement_sparsity/linear_layer_sparsity": 0.5151375877237164, "compression/movement_sparsity/model_sparsity": 0.49744102683682495, "compression_loss": 60.347938537597656, "distillation_loss": 2.092508316040039, "epoch": 2.72, "learning_rate": 4.041514041514042e-05, "loss": 62.194, "step": 3224, "task_loss": 1.2891664505004883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5637113622265093, "compression/movement_sparsity/importance_threshold": -0.0031033639095784946, "compression/movement_sparsity/linear_layer_sparsity": 0.5157121895137534, "compression/movement_sparsity/model_sparsity": 0.4979958892876923, "compression_loss": 60.39963150024414, "distillation_loss": 2.690955400466919, "epoch": 2.73, "learning_rate": 4.041044425659811e-05, "loss": 62.5937, "step": 3225, "task_loss": 1.658487319946289 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5641970267137502, "compression/movement_sparsity/importance_threshold": -0.003099909330404581, "compression/movement_sparsity/linear_layer_sparsity": 0.5162377352781364, "compression/movement_sparsity/model_sparsity": 0.4985033809383009, "compression_loss": 60.451236724853516, "distillation_loss": 1.602464199066162, "epoch": 2.73, "learning_rate": 4.040574809805579e-05, "loss": 62.3793, "step": 3226, "task_loss": 0.809172511100769 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5646823306483073, "compression/movement_sparsity/importance_threshold": -0.0030964573158773013, "compression/movement_sparsity/linear_layer_sparsity": 0.5167909570356023, "compression/movement_sparsity/model_sparsity": 0.4990375978264886, "compression_loss": 60.50285339355469, "distillation_loss": 1.211909532546997, "epoch": 2.73, "learning_rate": 4.0401051939513476e-05, "loss": 62.2312, "step": 3227, "task_loss": 0.7026483416557312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5651672741640659, "compression/movement_sparsity/importance_threshold": -0.0030930078650443175, "compression/movement_sparsity/linear_layer_sparsity": 0.5174723993676567, "compression/movement_sparsity/model_sparsity": 0.49969563051807503, "compression_loss": 60.55445861816406, "distillation_loss": 2.8750758171081543, "epoch": 2.73, "learning_rate": 4.039635578097117e-05, "loss": 62.6207, "step": 3228, "task_loss": 1.952465295791626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.565651857394911, "compression/movement_sparsity/importance_threshold": -0.0030895609769532907, "compression/movement_sparsity/linear_layer_sparsity": 0.5179627449891786, "compression/movement_sparsity/model_sparsity": 0.5001691312590181, "compression_loss": 60.60600280761719, "distillation_loss": 1.5798988342285156, "epoch": 2.73, "learning_rate": 4.0391659622428855e-05, "loss": 62.1775, "step": 3229, "task_loss": 0.8366016745567322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5661360804747277, "compression/movement_sparsity/importance_threshold": -0.003086116650651884, "compression/movement_sparsity/linear_layer_sparsity": 0.518664160302023, "compression/movement_sparsity/model_sparsity": 0.5008464507980603, "compression_loss": 60.65749740600586, "distillation_loss": 3.2636427879333496, "epoch": 2.73, "learning_rate": 4.038696346388654e-05, "loss": 63.044, "step": 3230, "task_loss": 2.0450456142425537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5666199435374015, "compression/movement_sparsity/importance_threshold": -0.003082674885187757, "compression/movement_sparsity/linear_layer_sparsity": 0.5192895232736858, "compression/movement_sparsity/model_sparsity": 0.5014503306278051, "compression_loss": 60.70896530151367, "distillation_loss": 0.9212397336959839, "epoch": 2.73, "learning_rate": 4.038226730534423e-05, "loss": 62.0664, "step": 3231, "task_loss": 1.161199688911438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5671034467168172, "compression/movement_sparsity/importance_threshold": -0.003079235679608573, "compression/movement_sparsity/linear_layer_sparsity": 0.5199705244115376, "compression/movement_sparsity/model_sparsity": 0.5021079372815671, "compression_loss": 60.760379791259766, "distillation_loss": 1.933777928352356, "epoch": 2.73, "learning_rate": 4.037757114680192e-05, "loss": 62.769, "step": 3232, "task_loss": 0.993301510810852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5675865901468602, "compression/movement_sparsity/importance_threshold": -0.0030757990329619922, "compression/movement_sparsity/linear_layer_sparsity": 0.5207536679693582, "compression/movement_sparsity/model_sparsity": 0.5028641774489452, "compression_loss": 60.811763763427734, "distillation_loss": 2.2426950931549072, "epoch": 2.73, "learning_rate": 4.037287498825961e-05, "loss": 62.4492, "step": 3233, "task_loss": 1.164536476135254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5680693739614154, "compression/movement_sparsity/importance_threshold": -0.003072364944295679, "compression/movement_sparsity/linear_layer_sparsity": 0.5214937533578456, "compression/movement_sparsity/model_sparsity": 0.5035788386275691, "compression_loss": 60.863094329833984, "distillation_loss": 2.0538957118988037, "epoch": 2.73, "learning_rate": 4.0368178829717294e-05, "loss": 63.0501, "step": 3234, "task_loss": 1.3006826639175415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5685517982943682, "compression/movement_sparsity/importance_threshold": -0.003068933412657293, "compression/movement_sparsity/linear_layer_sparsity": 0.5222219384270325, "compression/movement_sparsity/model_sparsity": 0.5042820082994701, "compression_loss": 60.91440200805664, "distillation_loss": 1.802045226097107, "epoch": 2.73, "learning_rate": 4.036348267117498e-05, "loss": 62.5105, "step": 3235, "task_loss": 1.0387505292892456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5690338632796035, "compression/movement_sparsity/importance_threshold": -0.003065504437094497, "compression/movement_sparsity/linear_layer_sparsity": 0.5228950696142446, "compression/movement_sparsity/model_sparsity": 0.5049320153596077, "compression_loss": 60.96563720703125, "distillation_loss": 1.4632132053375244, "epoch": 2.73, "learning_rate": 4.035878651263267e-05, "loss": 62.9784, "step": 3236, "task_loss": 0.8749130368232727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5695155690510064, "compression/movement_sparsity/importance_threshold": -0.0030620780166549545, "compression/movement_sparsity/linear_layer_sparsity": 0.5234951056538488, "compression/movement_sparsity/model_sparsity": 0.5055114383153249, "compression_loss": 61.01691436767578, "distillation_loss": 3.011005163192749, "epoch": 2.74, "learning_rate": 4.035409035409036e-05, "loss": 63.1214, "step": 3237, "task_loss": 1.7062886953353882 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5699969157424623, "compression/movement_sparsity/importance_threshold": -0.003058654150386324, "compression/movement_sparsity/linear_layer_sparsity": 0.5241243201316581, "compression/movement_sparsity/model_sparsity": 0.5061190373401312, "compression_loss": 61.06809616088867, "distillation_loss": 1.5956169366836548, "epoch": 2.74, "learning_rate": 4.0349394195548046e-05, "loss": 62.7534, "step": 3238, "task_loss": 0.947770893573761 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5704779034878561, "compression/movement_sparsity/importance_threshold": -0.0030552328373362704, "compression/movement_sparsity/linear_layer_sparsity": 0.5247987033564719, "compression/movement_sparsity/model_sparsity": 0.5067702534265273, "compression_loss": 61.119258880615234, "distillation_loss": 2.0458531379699707, "epoch": 2.74, "learning_rate": 4.034469803700573e-05, "loss": 62.9699, "step": 3239, "task_loss": 0.7789781093597412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5709585324210729, "compression/movement_sparsity/importance_threshold": -0.0030518140765524553, "compression/movement_sparsity/linear_layer_sparsity": 0.5254551764814968, "compression/movement_sparsity/model_sparsity": 0.5074041746801599, "compression_loss": 61.17038345336914, "distillation_loss": 1.0836228132247925, "epoch": 2.74, "learning_rate": 4.034000187846342e-05, "loss": 62.9745, "step": 3240, "task_loss": 1.561343789100647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5714388026759982, "compression/movement_sparsity/importance_threshold": -0.0030483978670825373, "compression/movement_sparsity/linear_layer_sparsity": 0.526128891952923, "compression/movement_sparsity/model_sparsity": 0.5080547459525514, "compression_loss": 61.22146224975586, "distillation_loss": 1.5275442600250244, "epoch": 2.74, "learning_rate": 4.0335305719921105e-05, "loss": 62.6536, "step": 3241, "task_loss": 0.8144348859786987 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5719187143865168, "compression/movement_sparsity/importance_threshold": -0.003044984207974182, "compression/movement_sparsity/linear_layer_sparsity": 0.5267658332913603, "compression/movement_sparsity/model_sparsity": 0.5086698063965526, "compression_loss": 61.27248764038086, "distillation_loss": 1.7867106199264526, "epoch": 2.74, "learning_rate": 4.03306095613788e-05, "loss": 62.6149, "step": 3242, "task_loss": 1.1457874774932861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.572398267686514, "compression/movement_sparsity/importance_threshold": -0.0030415730982750492, "compression/movement_sparsity/linear_layer_sparsity": 0.5274836801072068, "compression/movement_sparsity/model_sparsity": 0.5093629929659198, "compression_loss": 61.32349395751953, "distillation_loss": 1.1557576656341553, "epoch": 2.74, "learning_rate": 4.032591340283648e-05, "loss": 63.182, "step": 3243, "task_loss": 1.9832526445388794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.572877462709875, "compression/movement_sparsity/importance_threshold": -0.0030381645370328005, "compression/movement_sparsity/linear_layer_sparsity": 0.5281882672486423, "compression/movement_sparsity/model_sparsity": 0.5100433753714834, "compression_loss": 61.37441635131836, "distillation_loss": 1.8031165599822998, "epoch": 2.74, "learning_rate": 4.032121724429417e-05, "loss": 63.3562, "step": 3244, "task_loss": 2.008988618850708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5733562995904848, "compression/movement_sparsity/importance_threshold": -0.0030347585232950985, "compression/movement_sparsity/linear_layer_sparsity": 0.5288650710794863, "compression/movement_sparsity/model_sparsity": 0.5106969289086457, "compression_loss": 61.42534255981445, "distillation_loss": 2.933014154434204, "epoch": 2.74, "learning_rate": 4.031652108575186e-05, "loss": 63.7195, "step": 3245, "task_loss": 2.3858935832977295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5738347784622286, "compression/movement_sparsity/importance_threshold": -0.003031355056109606, "compression/movement_sparsity/linear_layer_sparsity": 0.5296176887681591, "compression/movement_sparsity/model_sparsity": 0.5114236918643897, "compression_loss": 61.47623062133789, "distillation_loss": 1.2888081073760986, "epoch": 2.74, "learning_rate": 4.0311824927209544e-05, "loss": 62.9345, "step": 3246, "task_loss": 0.6766534447669983 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5743128994589916, "compression/movement_sparsity/importance_threshold": -0.0030279541345239833, "compression/movement_sparsity/linear_layer_sparsity": 0.530280577095372, "compression/movement_sparsity/model_sparsity": 0.5120638079382799, "compression_loss": 61.52706527709961, "distillation_loss": 2.142594575881958, "epoch": 2.74, "learning_rate": 4.030712876866723e-05, "loss": 63.3229, "step": 3247, "task_loss": 1.6662958860397339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5747906627146587, "compression/movement_sparsity/importance_threshold": -0.0030245557575858924, "compression/movement_sparsity/linear_layer_sparsity": 0.5310455243733803, "compression/movement_sparsity/model_sparsity": 0.5128024769240355, "compression_loss": 61.57783889770508, "distillation_loss": 2.231569528579712, "epoch": 2.75, "learning_rate": 4.0302432610124916e-05, "loss": 63.5439, "step": 3248, "task_loss": 1.9656161069869995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5752680683631153, "compression/movement_sparsity/importance_threshold": -0.003021159924342997, "compression/movement_sparsity/linear_layer_sparsity": 0.5316758716471148, "compression/movement_sparsity/model_sparsity": 0.5134111698297422, "compression_loss": 61.628604888916016, "distillation_loss": 2.2666854858398438, "epoch": 2.75, "learning_rate": 4.029773645158261e-05, "loss": 63.7608, "step": 3249, "task_loss": 1.55625319480896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5757451165382462, "compression/movement_sparsity/importance_threshold": -0.003017766633842958, "compression/movement_sparsity/linear_layer_sparsity": 0.5322238706191562, "compression/movement_sparsity/model_sparsity": 0.5139403433512519, "compression_loss": 61.679325103759766, "distillation_loss": 2.0386757850646973, "epoch": 2.75, "learning_rate": 4.0293040293040296e-05, "loss": 63.4694, "step": 3250, "task_loss": 1.2758312225341797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.576221807373937, "compression/movement_sparsity/importance_threshold": -0.003014375885133436, "compression/movement_sparsity/linear_layer_sparsity": 0.5327363951924808, "compression/movement_sparsity/model_sparsity": 0.5144352611287729, "compression_loss": 61.72999572753906, "distillation_loss": 1.6926629543304443, "epoch": 2.75, "learning_rate": 4.028834413449798e-05, "loss": 63.463, "step": 3251, "task_loss": 2.106184720993042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5766981410040727, "compression/movement_sparsity/importance_threshold": -0.0030109876772620933, "compression/movement_sparsity/linear_layer_sparsity": 0.5333379813738778, "compression/movement_sparsity/model_sparsity": 0.5150161809741434, "compression_loss": 61.78059005737305, "distillation_loss": 2.308220148086548, "epoch": 2.75, "learning_rate": 4.028364797595567e-05, "loss": 63.9441, "step": 3252, "task_loss": 3.4334943294525146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5771741175625382, "compression/movement_sparsity/importance_threshold": -0.0030076020092765924, "compression/movement_sparsity/linear_layer_sparsity": 0.5339745411389507, "compression/movement_sparsity/model_sparsity": 0.515630872952999, "compression_loss": 61.83122253417969, "distillation_loss": 2.2797534465789795, "epoch": 2.75, "learning_rate": 4.0278951817413355e-05, "loss": 63.6813, "step": 3253, "task_loss": 1.0787466764450073 }, { "compression/movement_sparsity/importance_regularization_factor": 0.577649737183219, "compression/movement_sparsity/importance_threshold": -0.003004218880224595, "compression/movement_sparsity/linear_layer_sparsity": 0.5346851022883717, "compression/movement_sparsity/model_sparsity": 0.5163170241409958, "compression_loss": 61.88179016113281, "distillation_loss": 3.2299184799194336, "epoch": 2.75, "learning_rate": 4.027425565887105e-05, "loss": 64.2504, "step": 3254, "task_loss": 2.8829147815704346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.578125, "compression/movement_sparsity/importance_threshold": -0.003000838289153762, "compression/movement_sparsity/linear_layer_sparsity": 0.5353294485349109, "compression/movement_sparsity/model_sparsity": 0.5169392351117253, "compression_loss": 61.93232345581055, "distillation_loss": 2.0098862648010254, "epoch": 2.75, "learning_rate": 4.0269559500328734e-05, "loss": 63.5558, "step": 3255, "task_loss": 0.7921760678291321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5785999061467664, "compression/movement_sparsity/importance_threshold": -0.002997460235111757, "compression/movement_sparsity/linear_layer_sparsity": 0.5359966414866404, "compression/movement_sparsity/model_sparsity": 0.5175835079330373, "compression_loss": 61.98283004760742, "distillation_loss": 1.9588875770568848, "epoch": 2.75, "learning_rate": 4.026486334178642e-05, "loss": 63.4862, "step": 3256, "task_loss": 1.8581688404083252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5790744557574032, "compression/movement_sparsity/importance_threshold": -0.002994084717146241, "compression/movement_sparsity/linear_layer_sparsity": 0.5366290277930407, "compression/movement_sparsity/model_sparsity": 0.518194169824365, "compression_loss": 62.033260345458984, "distillation_loss": 2.8196396827697754, "epoch": 2.75, "learning_rate": 4.026016718324411e-05, "loss": 64.0444, "step": 3257, "task_loss": 1.6673789024353027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5795486489657957, "compression/movement_sparsity/importance_threshold": -0.0029907117343048756, "compression/movement_sparsity/linear_layer_sparsity": 0.5372932516270288, "compression/movement_sparsity/model_sparsity": 0.5188355755262641, "compression_loss": 62.08372116088867, "distillation_loss": 1.8913958072662354, "epoch": 2.75, "learning_rate": 4.025547102470179e-05, "loss": 64.1476, "step": 3258, "task_loss": 2.0021309852600098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5800224859058292, "compression/movement_sparsity/importance_threshold": -0.002987341285635323, "compression/movement_sparsity/linear_layer_sparsity": 0.5379319458181085, "compression/movement_sparsity/model_sparsity": 0.5194523286070271, "compression_loss": 62.1341438293457, "distillation_loss": 1.8917821645736694, "epoch": 2.75, "learning_rate": 4.0250774866159486e-05, "loss": 64.2596, "step": 3259, "task_loss": 1.3354722261428833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5804959667113884, "compression/movement_sparsity/importance_threshold": -0.0029839733701852453, "compression/movement_sparsity/linear_layer_sparsity": 0.5385074061482154, "compression/movement_sparsity/model_sparsity": 0.5200080201044717, "compression_loss": 62.184505462646484, "distillation_loss": 4.244128704071045, "epoch": 2.76, "learning_rate": 4.0246078707617166e-05, "loss": 64.7264, "step": 3260, "task_loss": 1.5644769668579102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.580969091516359, "compression/movement_sparsity/importance_threshold": -0.0029806079870023016, "compression/movement_sparsity/linear_layer_sparsity": 0.5391559616259299, "compression/movement_sparsity/model_sparsity": 0.5206342957063367, "compression_loss": 62.234825134277344, "distillation_loss": 2.2206366062164307, "epoch": 2.76, "learning_rate": 4.024138254907486e-05, "loss": 63.946, "step": 3261, "task_loss": 1.5240553617477417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5814418604546254, "compression/movement_sparsity/importance_threshold": -0.002977245135134159, "compression/movement_sparsity/linear_layer_sparsity": 0.5397457905780378, "compression/movement_sparsity/model_sparsity": 0.5212038622194136, "compression_loss": 62.28511047363281, "distillation_loss": 2.4709553718566895, "epoch": 2.76, "learning_rate": 4.0236686390532545e-05, "loss": 64.3977, "step": 3262, "task_loss": 1.868844985961914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5819142736600736, "compression/movement_sparsity/importance_threshold": -0.002973884813628474, "compression/movement_sparsity/linear_layer_sparsity": 0.5404767659024515, "compression/movement_sparsity/model_sparsity": 0.5219097262926906, "compression_loss": 62.3353385925293, "distillation_loss": 1.0168776512145996, "epoch": 2.76, "learning_rate": 4.023199023199024e-05, "loss": 63.8202, "step": 3263, "task_loss": 0.800197184085846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.582386331266588, "compression/movement_sparsity/importance_threshold": -0.002970527021532914, "compression/movement_sparsity/linear_layer_sparsity": 0.5411362081452177, "compression/movement_sparsity/model_sparsity": 0.5225465146657361, "compression_loss": 62.385555267333984, "distillation_loss": 2.0899786949157715, "epoch": 2.76, "learning_rate": 4.0227294073447925e-05, "loss": 64.4466, "step": 3264, "task_loss": 1.675853967666626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5828580334080542, "compression/movement_sparsity/importance_threshold": -0.0029671717578951358, "compression/movement_sparsity/linear_layer_sparsity": 0.5418024829360392, "compression/movement_sparsity/model_sparsity": 0.5231899008677918, "compression_loss": 62.43568420410156, "distillation_loss": 2.876338481903076, "epoch": 2.76, "learning_rate": 4.0222597914905604e-05, "loss": 64.7883, "step": 3265, "task_loss": 2.3792572021484375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5833293802183572, "compression/movement_sparsity/importance_threshold": -0.002963819021762803, "compression/movement_sparsity/linear_layer_sparsity": 0.5422844935643837, "compression/movement_sparsity/model_sparsity": 0.5236553529482145, "compression_loss": 62.485816955566406, "distillation_loss": 2.022003650665283, "epoch": 2.76, "learning_rate": 4.02179017563633e-05, "loss": 64.4935, "step": 3266, "task_loss": 1.3072806596755981 }, { "compression/movement_sparsity/importance_regularization_factor": 0.583800371831382, "compression/movement_sparsity/importance_threshold": -0.00296046881218358, "compression/movement_sparsity/linear_layer_sparsity": 0.5428635669172841, "compression/movement_sparsity/model_sparsity": 0.5242145333500049, "compression_loss": 62.5358772277832, "distillation_loss": 1.9025096893310547, "epoch": 2.76, "learning_rate": 4.0213205597820984e-05, "loss": 64.2779, "step": 3267, "task_loss": 0.6017476916313171 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5842710083810142, "compression/movement_sparsity/importance_threshold": -0.002957121128205123, "compression/movement_sparsity/linear_layer_sparsity": 0.5434188754040862, "compression/movement_sparsity/model_sparsity": 0.5247507652819566, "compression_loss": 62.58592987060547, "distillation_loss": 2.0655131340026855, "epoch": 2.76, "learning_rate": 4.020850943927868e-05, "loss": 64.5279, "step": 3268, "task_loss": 1.3567867279052734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5847412900011383, "compression/movement_sparsity/importance_threshold": -0.0029537759688750996, "compression/movement_sparsity/linear_layer_sparsity": 0.544019448031234, "compression/movement_sparsity/model_sparsity": 0.5253307063917845, "compression_loss": 62.635921478271484, "distillation_loss": 2.732645034790039, "epoch": 2.76, "learning_rate": 4.0203813280736356e-05, "loss": 64.8408, "step": 3269, "task_loss": 2.510875940322876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.58521121682564, "compression/movement_sparsity/importance_threshold": -0.002950433333241167, "compression/movement_sparsity/linear_layer_sparsity": 0.5446425454110461, "compression/movement_sparsity/model_sparsity": 0.5259323984597282, "compression_loss": 62.68589401245117, "distillation_loss": 2.447547674179077, "epoch": 2.76, "learning_rate": 4.019911712219405e-05, "loss": 64.4699, "step": 3270, "task_loss": 2.282616138458252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5856807889884039, "compression/movement_sparsity/importance_threshold": -0.0029470932203509914, "compression/movement_sparsity/linear_layer_sparsity": 0.5453151280865469, "compression/movement_sparsity/model_sparsity": 0.5265818758512193, "compression_loss": 62.735774993896484, "distillation_loss": 1.4536839723587036, "epoch": 2.76, "learning_rate": 4.0194420963651736e-05, "loss": 65.1926, "step": 3271, "task_loss": 1.329524040222168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5861500066233158, "compression/movement_sparsity/importance_threshold": -0.0029437556292522303, "compression/movement_sparsity/linear_layer_sparsity": 0.5459185744380951, "compression/movement_sparsity/model_sparsity": 0.5271645919641735, "compression_loss": 62.7856559753418, "distillation_loss": 2.601529598236084, "epoch": 2.77, "learning_rate": 4.018972480510942e-05, "loss": 65.1251, "step": 3272, "task_loss": 2.0392565727233887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5866188698642603, "compression/movement_sparsity/importance_threshold": -0.00294042055899255, "compression/movement_sparsity/linear_layer_sparsity": 0.5467474710271344, "compression/movement_sparsity/model_sparsity": 0.5279650134053953, "compression_loss": 62.83549499511719, "distillation_loss": 2.546035051345825, "epoch": 2.77, "learning_rate": 4.018502864656711e-05, "loss": 64.9252, "step": 3273, "task_loss": 2.109670639038086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5870873788451227, "compression/movement_sparsity/importance_threshold": -0.0029370880086196085, "compression/movement_sparsity/linear_layer_sparsity": 0.547409095392578, "compression/movement_sparsity/model_sparsity": 0.5286039089384912, "compression_loss": 62.88528823852539, "distillation_loss": 2.6540310382843018, "epoch": 2.77, "learning_rate": 4.0180332488024795e-05, "loss": 65.0801, "step": 3274, "task_loss": 1.0296117067337036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5875555336997882, "compression/movement_sparsity/importance_threshold": -0.002933757977181071, "compression/movement_sparsity/linear_layer_sparsity": 0.5479113055608976, "compression/movement_sparsity/model_sparsity": 0.5290888666425498, "compression_loss": 62.935054779052734, "distillation_loss": 1.5500949621200562, "epoch": 2.77, "learning_rate": 4.017563632948249e-05, "loss": 64.4542, "step": 3275, "task_loss": 1.0028144121170044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.588023334562142, "compression/movement_sparsity/importance_threshold": -0.0029304304637245955, "compression/movement_sparsity/linear_layer_sparsity": 0.5486471340215391, "compression/movement_sparsity/model_sparsity": 0.5297994171318952, "compression_loss": 62.9847412109375, "distillation_loss": 1.9208810329437256, "epoch": 2.77, "learning_rate": 4.0170940170940174e-05, "loss": 64.6861, "step": 3276, "task_loss": 1.8509160280227661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5884907815660689, "compression/movement_sparsity/importance_threshold": -0.002927105467297848, "compression/movement_sparsity/linear_layer_sparsity": 0.5494629617228495, "compression/movement_sparsity/model_sparsity": 0.5305872186418861, "compression_loss": 63.034385681152344, "distillation_loss": 1.3847700357437134, "epoch": 2.77, "learning_rate": 4.016624401239786e-05, "loss": 64.8826, "step": 3277, "task_loss": 0.3949688971042633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5889578748454543, "compression/movement_sparsity/importance_threshold": -0.002923782986948489, "compression/movement_sparsity/linear_layer_sparsity": 0.5500938336599601, "compression/movement_sparsity/model_sparsity": 0.5311964181871678, "compression_loss": 63.0839958190918, "distillation_loss": 1.6337634325027466, "epoch": 2.77, "learning_rate": 4.016154785385555e-05, "loss": 64.8297, "step": 3278, "task_loss": 1.2714850902557373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5894246145341837, "compression/movement_sparsity/importance_threshold": -0.0029204630217241753, "compression/movement_sparsity/linear_layer_sparsity": 0.5507778039155532, "compression/movement_sparsity/model_sparsity": 0.5318568919603427, "compression_loss": 63.133544921875, "distillation_loss": 2.991039276123047, "epoch": 2.77, "learning_rate": 4.0156851695313233e-05, "loss": 65.464, "step": 3279, "task_loss": 1.8225722312927246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5898910007661413, "compression/movement_sparsity/importance_threshold": -0.002917145570672578, "compression/movement_sparsity/linear_layer_sparsity": 0.5513466701809574, "compression/movement_sparsity/model_sparsity": 0.5324062159194929, "compression_loss": 63.183074951171875, "distillation_loss": 1.8554184436798096, "epoch": 2.77, "learning_rate": 4.0152155536770927e-05, "loss": 64.8261, "step": 3280, "task_loss": 0.6839707493782043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5903570336752134, "compression/movement_sparsity/importance_threshold": -0.0029138306328413506, "compression/movement_sparsity/linear_layer_sparsity": 0.5520925984115864, "compression/movement_sparsity/model_sparsity": 0.5331265192206561, "compression_loss": 63.232547760009766, "distillation_loss": 2.146533966064453, "epoch": 2.77, "learning_rate": 4.014745937822861e-05, "loss": 65.5078, "step": 3281, "task_loss": 1.2412214279174805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5908227133952841, "compression/movement_sparsity/importance_threshold": -0.0029105182072781607, "compression/movement_sparsity/linear_layer_sparsity": 0.5527012437002238, "compression/movement_sparsity/model_sparsity": 0.5337142556712169, "compression_loss": 63.282012939453125, "distillation_loss": 2.486207962036133, "epoch": 2.77, "learning_rate": 4.01427632196863e-05, "loss": 65.4539, "step": 3282, "task_loss": 2.1997179985046387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5912880400602394, "compression/movement_sparsity/importance_threshold": -0.0029072082930306662, "compression/movement_sparsity/linear_layer_sparsity": 0.5532881631554286, "compression/movement_sparsity/model_sparsity": 0.53428101263756, "compression_loss": 63.331443786621094, "distillation_loss": 1.600028395652771, "epoch": 2.77, "learning_rate": 4.0138067061143986e-05, "loss": 65.2979, "step": 3283, "task_loss": 1.978952169418335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5917530138039635, "compression/movement_sparsity/importance_threshold": -0.0029039008891465334, "compression/movement_sparsity/linear_layer_sparsity": 0.5539746136378899, "compression/movement_sparsity/model_sparsity": 0.5349438814341801, "compression_loss": 63.38081359863281, "distillation_loss": 2.099459648132324, "epoch": 2.78, "learning_rate": 4.013337090260167e-05, "loss": 65.4328, "step": 3284, "task_loss": 0.9957975149154663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5922176347603425, "compression/movement_sparsity/importance_threshold": -0.002900595994673418, "compression/movement_sparsity/linear_layer_sparsity": 0.5545647287700212, "compression/movement_sparsity/model_sparsity": 0.5355137242961161, "compression_loss": 63.43017578125, "distillation_loss": 1.3462666273117065, "epoch": 2.78, "learning_rate": 4.0128674744059365e-05, "loss": 65.0134, "step": 3285, "task_loss": 0.8176709413528442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5926819030632609, "compression/movement_sparsity/importance_threshold": -0.0028972936086589882, "compression/movement_sparsity/linear_layer_sparsity": 0.5551074811083028, "compression/movement_sparsity/model_sparsity": 0.5360378314218762, "compression_loss": 63.47947692871094, "distillation_loss": 1.9816491603851318, "epoch": 2.78, "learning_rate": 4.0123978585517045e-05, "loss": 65.5555, "step": 3286, "task_loss": 1.4764769077301025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5931458188466039, "compression/movement_sparsity/importance_threshold": -0.0028939937301509025, "compression/movement_sparsity/linear_layer_sparsity": 0.5556877945746372, "compression/movement_sparsity/model_sparsity": 0.5365982093353892, "compression_loss": 63.528778076171875, "distillation_loss": 1.7287217378616333, "epoch": 2.78, "learning_rate": 4.011928242697474e-05, "loss": 65.3457, "step": 3287, "task_loss": 1.948217511177063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5936093822442572, "compression/movement_sparsity/importance_threshold": -0.002890696358196821, "compression/movement_sparsity/linear_layer_sparsity": 0.5561695786437967, "compression/movement_sparsity/model_sparsity": 0.5370634426396318, "compression_loss": 63.578025817871094, "distillation_loss": 1.7825443744659424, "epoch": 2.78, "learning_rate": 4.0114586268432424e-05, "loss": 65.7166, "step": 3288, "task_loss": 0.9606853723526001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5940725933901052, "compression/movement_sparsity/importance_threshold": -0.0028874014918444093, "compression/movement_sparsity/linear_layer_sparsity": 0.5568753224294929, "compression/movement_sparsity/model_sparsity": 0.5377449419551675, "compression_loss": 63.62722396850586, "distillation_loss": 2.2777256965637207, "epoch": 2.78, "learning_rate": 4.010989010989011e-05, "loss": 65.7061, "step": 3289, "task_loss": 0.4186234474182129 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5945354524180337, "compression/movement_sparsity/importance_threshold": -0.002884109130141326, "compression/movement_sparsity/linear_layer_sparsity": 0.5576349873012384, "compression/movement_sparsity/model_sparsity": 0.5384785100015661, "compression_loss": 63.67640686035156, "distillation_loss": 2.72342586517334, "epoch": 2.78, "learning_rate": 4.01051939513478e-05, "loss": 65.8326, "step": 3290, "task_loss": 1.9818166494369507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5949979594619271, "compression/movement_sparsity/importance_threshold": -0.0028808192721352377, "compression/movement_sparsity/linear_layer_sparsity": 0.5582714516729702, "compression/movement_sparsity/model_sparsity": 0.5390931098641355, "compression_loss": 63.725528717041016, "distillation_loss": 2.2604751586914062, "epoch": 2.78, "learning_rate": 4.010049779280548e-05, "loss": 65.8931, "step": 3291, "task_loss": 1.3371593952178955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5954601146556714, "compression/movement_sparsity/importance_threshold": -0.002877531916873799, "compression/movement_sparsity/linear_layer_sparsity": 0.5589583910463047, "compression/movement_sparsity/model_sparsity": 0.5397564507567232, "compression_loss": 63.77465057373047, "distillation_loss": 2.9081997871398926, "epoch": 2.78, "learning_rate": 4.0095801634263176e-05, "loss": 65.858, "step": 3292, "task_loss": 2.119868755340576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.595921918133151, "compression/movement_sparsity/importance_threshold": -0.002874247063404679, "compression/movement_sparsity/linear_layer_sparsity": 0.5596079123815978, "compression/movement_sparsity/model_sparsity": 0.5403836590359876, "compression_loss": 63.82370376586914, "distillation_loss": 2.141378164291382, "epoch": 2.78, "learning_rate": 4.009110547572086e-05, "loss": 65.9313, "step": 3293, "task_loss": 0.8515664935112 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5963833700282513, "compression/movement_sparsity/importance_threshold": -0.0028709647107755367, "compression/movement_sparsity/linear_layer_sparsity": 0.5603437885389098, "compression/movement_sparsity/model_sparsity": 0.5410942555834761, "compression_loss": 63.872772216796875, "distillation_loss": 1.7234652042388916, "epoch": 2.78, "learning_rate": 4.0086409317178556e-05, "loss": 65.9134, "step": 3294, "task_loss": 0.7160211205482483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5968444704748577, "compression/movement_sparsity/importance_threshold": -0.0028676848580340315, "compression/movement_sparsity/linear_layer_sparsity": 0.5609519091641711, "compression/movement_sparsity/model_sparsity": 0.5416814853944619, "compression_loss": 63.921783447265625, "distillation_loss": 2.358597993850708, "epoch": 2.78, "learning_rate": 4.0081713158636235e-05, "loss": 66.1444, "step": 3295, "task_loss": 2.047884702682495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5973052196068549, "compression/movement_sparsity/importance_threshold": -0.002864407504227829, "compression/movement_sparsity/linear_layer_sparsity": 0.5616060451523394, "compression/movement_sparsity/model_sparsity": 0.5423131497990789, "compression_loss": 63.97074508666992, "distillation_loss": 2.483809471130371, "epoch": 2.79, "learning_rate": 4.007701700009392e-05, "loss": 65.9607, "step": 3296, "task_loss": 2.3014729022979736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5977656175581283, "compression/movement_sparsity/importance_threshold": -0.00286113264840459, "compression/movement_sparsity/linear_layer_sparsity": 0.5622142134742713, "compression/movement_sparsity/model_sparsity": 0.5429004256682078, "compression_loss": 64.0196533203125, "distillation_loss": 2.29056978225708, "epoch": 2.79, "learning_rate": 4.0072320841551615e-05, "loss": 66.344, "step": 3297, "task_loss": 2.2018308639526367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.598225664462563, "compression/movement_sparsity/importance_threshold": -0.0028578602896119753, "compression/movement_sparsity/linear_layer_sparsity": 0.5628751939346625, "compression/movement_sparsity/model_sparsity": 0.5435386994163708, "compression_loss": 64.06854248046875, "distillation_loss": 2.8957924842834473, "epoch": 2.79, "learning_rate": 4.00676246830093e-05, "loss": 66.2452, "step": 3298, "task_loss": 2.8159518241882324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5986853604540443, "compression/movement_sparsity/importance_threshold": -0.0028545904268976463, "compression/movement_sparsity/linear_layer_sparsity": 0.5634709134255825, "compression/movement_sparsity/model_sparsity": 0.5441139541101302, "compression_loss": 64.1174087524414, "distillation_loss": 1.3112953901290894, "epoch": 2.79, "learning_rate": 4.006292852446699e-05, "loss": 66.1971, "step": 3299, "task_loss": 0.7206677198410034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.599144705666457, "compression/movement_sparsity/importance_threshold": -0.002851323059309267, "compression/movement_sparsity/linear_layer_sparsity": 0.5640914828818557, "compression/movement_sparsity/model_sparsity": 0.5447132050964856, "compression_loss": 64.16621398925781, "distillation_loss": 1.743255615234375, "epoch": 2.79, "learning_rate": 4.0058232365924674e-05, "loss": 66.1699, "step": 3300, "task_loss": 1.877342700958252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5996037002336866, "compression/movement_sparsity/importance_threshold": -0.0028480581858944965, "compression/movement_sparsity/linear_layer_sparsity": 0.5646837324399938, "compression/movement_sparsity/model_sparsity": 0.5452851090603288, "compression_loss": 64.21501159667969, "distillation_loss": 1.7145228385925293, "epoch": 2.79, "learning_rate": 4.005353620738237e-05, "loss": 66.4153, "step": 3301, "task_loss": 0.9960841536521912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6000623442896178, "compression/movement_sparsity/importance_threshold": -0.0028447958057010005, "compression/movement_sparsity/linear_layer_sparsity": 0.5652609098502401, "compression/movement_sparsity/model_sparsity": 0.5458424586509277, "compression_loss": 64.26375579833984, "distillation_loss": 2.026095390319824, "epoch": 2.79, "learning_rate": 4.004884004884005e-05, "loss": 67.1901, "step": 3302, "task_loss": 1.2029534578323364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6005206379681363, "compression/movement_sparsity/importance_threshold": -0.0028415359177764374, "compression/movement_sparsity/linear_layer_sparsity": 0.5658234443826295, "compression/movement_sparsity/model_sparsity": 0.546385668391571, "compression_loss": 64.31245422363281, "distillation_loss": 2.3918118476867676, "epoch": 2.79, "learning_rate": 4.004414389029774e-05, "loss": 66.9685, "step": 3303, "task_loss": 3.1581692695617676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6009785814031265, "compression/movement_sparsity/importance_threshold": -0.0028382785211684717, "compression/movement_sparsity/linear_layer_sparsity": 0.566374650955765, "compression/movement_sparsity/model_sparsity": 0.5469179393232094, "compression_loss": 64.36109161376953, "distillation_loss": 3.2336316108703613, "epoch": 2.79, "learning_rate": 4.0039447731755426e-05, "loss": 66.7293, "step": 3304, "task_loss": 1.8983850479125977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6014361747284744, "compression/movement_sparsity/importance_threshold": -0.002835023614924761, "compression/movement_sparsity/linear_layer_sparsity": 0.5670069299446566, "compression/movement_sparsity/model_sparsity": 0.5475284975837149, "compression_loss": 64.40972900390625, "distillation_loss": 2.3197243213653564, "epoch": 2.79, "learning_rate": 4.003475157321311e-05, "loss": 66.3054, "step": 3305, "task_loss": 2.02203631401062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6018934180780646, "compression/movement_sparsity/importance_threshold": -0.002831771198092972, "compression/movement_sparsity/linear_layer_sparsity": 0.5676025063455651, "compression/movement_sparsity/model_sparsity": 0.5481036141030449, "compression_loss": 64.45832824707031, "distillation_loss": 1.844066858291626, "epoch": 2.79, "learning_rate": 4.0030055414670805e-05, "loss": 66.1389, "step": 3306, "task_loss": 1.1095407009124756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6023503115857822, "compression/movement_sparsity/importance_threshold": -0.0028285212697207653, "compression/movement_sparsity/linear_layer_sparsity": 0.5682522780883785, "compression/movement_sparsity/model_sparsity": 0.548731064187561, "compression_loss": 64.50686645507812, "distillation_loss": 2.3632962703704834, "epoch": 2.79, "learning_rate": 4.0025359256128485e-05, "loss": 66.8602, "step": 3307, "task_loss": 1.4247474670410156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6028068553855128, "compression/movement_sparsity/importance_threshold": -0.0028252738288557996, "compression/movement_sparsity/linear_layer_sparsity": 0.5688325677063778, "compression/movement_sparsity/model_sparsity": 0.5492914190720023, "compression_loss": 64.55537414550781, "distillation_loss": 1.9392775297164917, "epoch": 2.8, "learning_rate": 4.002066309758618e-05, "loss": 66.5936, "step": 3308, "task_loss": 1.6547439098358154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6032630496111409, "compression/movement_sparsity/importance_threshold": -0.002822028874545742, "compression/movement_sparsity/linear_layer_sparsity": 0.5695273174095137, "compression/movement_sparsity/model_sparsity": 0.5499623019855354, "compression_loss": 64.6038589477539, "distillation_loss": 2.75152850151062, "epoch": 2.8, "learning_rate": 4.0015966939043864e-05, "loss": 66.9177, "step": 3309, "task_loss": 1.7488577365875244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6037188943965524, "compression/movement_sparsity/importance_threshold": -0.0028187864058382486, "compression/movement_sparsity/linear_layer_sparsity": 0.5701069988949635, "compression/movement_sparsity/model_sparsity": 0.5505220696286512, "compression_loss": 64.6523208618164, "distillation_loss": 1.4428640604019165, "epoch": 2.8, "learning_rate": 4.001127078050155e-05, "loss": 66.418, "step": 3310, "task_loss": 2.0751166343688965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6041743898756315, "compression/movement_sparsity/importance_threshold": -0.0028155464217809872, "compression/movement_sparsity/linear_layer_sparsity": 0.5707202946089789, "compression/movement_sparsity/model_sparsity": 0.5511142967481719, "compression_loss": 64.70072937011719, "distillation_loss": 3.232705593109131, "epoch": 2.8, "learning_rate": 4.0006574621959244e-05, "loss": 66.7761, "step": 3311, "task_loss": 2.56551194190979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6046295361822644, "compression/movement_sparsity/importance_threshold": -0.002812308921421613, "compression/movement_sparsity/linear_layer_sparsity": 0.5712729678547335, "compression/movement_sparsity/model_sparsity": 0.5516479839677131, "compression_loss": 64.74906158447266, "distillation_loss": 2.3220319747924805, "epoch": 2.8, "learning_rate": 4.000187846341692e-05, "loss": 66.6685, "step": 3312, "task_loss": 1.1667242050170898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6050843334503353, "compression/movement_sparsity/importance_threshold": -0.0028090739038077947, "compression/movement_sparsity/linear_layer_sparsity": 0.5718980565705407, "compression/movement_sparsity/model_sparsity": 0.5522515989631345, "compression_loss": 64.79739379882812, "distillation_loss": 1.6359150409698486, "epoch": 2.8, "learning_rate": 3.9997182304874616e-05, "loss": 67.031, "step": 3313, "task_loss": 1.5530145168304443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6055387818137299, "compression/movement_sparsity/importance_threshold": -0.00280584136798719, "compression/movement_sparsity/linear_layer_sparsity": 0.5725603486893718, "compression/movement_sparsity/model_sparsity": 0.5528911393102348, "compression_loss": 64.84568786621094, "distillation_loss": 2.145482063293457, "epoch": 2.8, "learning_rate": 3.99924861463323e-05, "loss": 67.3609, "step": 3314, "task_loss": 1.6664559841156006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6059928814063333, "compression/movement_sparsity/importance_threshold": -0.0028026113130074602, "compression/movement_sparsity/linear_layer_sparsity": 0.5732501260146036, "compression/movement_sparsity/model_sparsity": 0.5535572206623417, "compression_loss": 64.89391326904297, "distillation_loss": 1.1197469234466553, "epoch": 2.8, "learning_rate": 3.998778998778999e-05, "loss": 66.6901, "step": 3315, "task_loss": 0.548944890499115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6064466323620304, "compression/movement_sparsity/importance_threshold": -0.0027993837379162697, "compression/movement_sparsity/linear_layer_sparsity": 0.573735380016545, "compression/movement_sparsity/model_sparsity": 0.5540258046965005, "compression_loss": 64.94212341308594, "distillation_loss": 2.353243112564087, "epoch": 2.8, "learning_rate": 3.9983093829247675e-05, "loss": 67.0707, "step": 3316, "task_loss": 1.7454273700714111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6069000348147066, "compression/movement_sparsity/importance_threshold": -0.0027961586417612786, "compression/movement_sparsity/linear_layer_sparsity": 0.5743789869646907, "compression/movement_sparsity/model_sparsity": 0.5546473017660108, "compression_loss": 64.99030303955078, "distillation_loss": 1.3296220302581787, "epoch": 2.8, "learning_rate": 3.997839767070536e-05, "loss": 66.7144, "step": 3317, "task_loss": 1.0979293584823608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6073530888982466, "compression/movement_sparsity/importance_threshold": -0.0027929360235901517, "compression/movement_sparsity/linear_layer_sparsity": 0.5750023109036878, "compression/movement_sparsity/model_sparsity": 0.5552492126101347, "compression_loss": 65.03843688964844, "distillation_loss": 1.4767000675201416, "epoch": 2.8, "learning_rate": 3.9973701512163055e-05, "loss": 66.8821, "step": 3318, "task_loss": 1.359595537185669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6078057947465363, "compression/movement_sparsity/importance_threshold": -0.002789715882450545, "compression/movement_sparsity/linear_layer_sparsity": 0.5756070808378435, "compression/movement_sparsity/model_sparsity": 0.5558332068365622, "compression_loss": 65.08648681640625, "distillation_loss": 1.8315837383270264, "epoch": 2.81, "learning_rate": 3.996900535362074e-05, "loss": 66.9316, "step": 3319, "task_loss": 1.1991068124771118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6082581524934598, "compression/movement_sparsity/importance_threshold": -0.0027864982173901277, "compression/movement_sparsity/linear_layer_sparsity": 0.5762376904432661, "compression/movement_sparsity/model_sparsity": 0.5564421530620565, "compression_loss": 65.13450622558594, "distillation_loss": 2.8331384658813477, "epoch": 2.81, "learning_rate": 3.996430919507843e-05, "loss": 67.5577, "step": 3320, "task_loss": 1.3014062643051147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6087101622729034, "compression/movement_sparsity/importance_threshold": -0.0027832830274565536, "compression/movement_sparsity/linear_layer_sparsity": 0.5768475996936729, "compression/movement_sparsity/model_sparsity": 0.5570311100534114, "compression_loss": 65.18254852294922, "distillation_loss": 1.8426620960235596, "epoch": 2.81, "learning_rate": 3.9959613036536114e-05, "loss": 67.2063, "step": 3321, "task_loss": 1.0427974462509155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6091618242187513, "compression/movement_sparsity/importance_threshold": -0.0027800703116974924, "compression/movement_sparsity/linear_layer_sparsity": 0.577359325347766, "compression/movement_sparsity/model_sparsity": 0.5575252563570342, "compression_loss": 65.23052215576172, "distillation_loss": 1.5088491439819336, "epoch": 2.81, "learning_rate": 3.99549168779938e-05, "loss": 67.4147, "step": 3322, "task_loss": 0.9442676305770874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6096131384648894, "compression/movement_sparsity/importance_threshold": -0.0027768600691606, "compression/movement_sparsity/linear_layer_sparsity": 0.5779616031308857, "compression/movement_sparsity/model_sparsity": 0.5581068440454806, "compression_loss": 65.27845764160156, "distillation_loss": 1.4766407012939453, "epoch": 2.81, "learning_rate": 3.995022071945149e-05, "loss": 67.0065, "step": 3323, "task_loss": 0.565028965473175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.610064105145202, "compression/movement_sparsity/importance_threshold": -0.0027736522988935424, "compression/movement_sparsity/linear_layer_sparsity": 0.5785270233118431, "compression/movement_sparsity/model_sparsity": 0.5586528403037861, "compression_loss": 65.32637786865234, "distillation_loss": 2.480224370956421, "epoch": 2.81, "learning_rate": 3.994552456090917e-05, "loss": 67.1116, "step": 3324, "task_loss": 1.2669494152069092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.610514724393575, "compression/movement_sparsity/importance_threshold": -0.0027704469999439785, "compression/movement_sparsity/linear_layer_sparsity": 0.5789979325401187, "compression/movement_sparsity/model_sparsity": 0.5591075723513842, "compression_loss": 65.37425994873047, "distillation_loss": 1.2833244800567627, "epoch": 2.81, "learning_rate": 3.9940828402366866e-05, "loss": 67.0767, "step": 3325, "task_loss": 1.1616863012313843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6109649963438932, "compression/movement_sparsity/importance_threshold": -0.002767244171359571, "compression/movement_sparsity/linear_layer_sparsity": 0.579469533370117, "compression/movement_sparsity/model_sparsity": 0.5595629722420582, "compression_loss": 65.4220962524414, "distillation_loss": 2.1778852939605713, "epoch": 2.81, "learning_rate": 3.993613224382455e-05, "loss": 67.3981, "step": 3326, "task_loss": 1.457416296005249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6114149211300417, "compression/movement_sparsity/importance_threshold": -0.0027640438121879834, "compression/movement_sparsity/linear_layer_sparsity": 0.5800922134040618, "compression/movement_sparsity/model_sparsity": 0.5601642613012492, "compression_loss": 65.46990203857422, "distillation_loss": 1.0909100770950317, "epoch": 2.81, "learning_rate": 3.9931436085282246e-05, "loss": 67.0374, "step": 3327, "task_loss": 0.14814597368240356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6118644988859057, "compression/movement_sparsity/importance_threshold": -0.002760845921476876, "compression/movement_sparsity/linear_layer_sparsity": 0.5808753211893795, "compression/movement_sparsity/model_sparsity": 0.5609204669250198, "compression_loss": 65.51768493652344, "distillation_loss": 2.714003562927246, "epoch": 2.81, "learning_rate": 3.992673992673993e-05, "loss": 67.6641, "step": 3328, "task_loss": 1.9574023485183716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6123137297453702, "compression/movement_sparsity/importance_threshold": -0.0027576504982739112, "compression/movement_sparsity/linear_layer_sparsity": 0.5814511630928505, "compression/movement_sparsity/model_sparsity": 0.5614765268876099, "compression_loss": 65.56544494628906, "distillation_loss": 1.8971818685531616, "epoch": 2.81, "learning_rate": 3.992204376819761e-05, "loss": 67.3975, "step": 3329, "task_loss": 1.7546738386154175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.612762613842321, "compression/movement_sparsity/importance_threshold": -0.002754457541626748, "compression/movement_sparsity/linear_layer_sparsity": 0.5820009626142049, "compression/movement_sparsity/model_sparsity": 0.5620074391040245, "compression_loss": 65.6131591796875, "distillation_loss": 3.002927541732788, "epoch": 2.81, "learning_rate": 3.9917347609655305e-05, "loss": 68.8264, "step": 3330, "task_loss": 0.8843226432800293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6132111513106422, "compression/movement_sparsity/importance_threshold": -0.002751267050583054, "compression/movement_sparsity/linear_layer_sparsity": 0.5827168777148944, "compression/movement_sparsity/model_sparsity": 0.5626987603185929, "compression_loss": 65.66082763671875, "distillation_loss": 1.7020246982574463, "epoch": 2.82, "learning_rate": 3.991265145111299e-05, "loss": 67.7427, "step": 3331, "task_loss": 0.5228107571601868 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6136593422842198, "compression/movement_sparsity/importance_threshold": -0.002748079024190485, "compression/movement_sparsity/linear_layer_sparsity": 0.5833049895868628, "compression/movement_sparsity/model_sparsity": 0.5632666687385155, "compression_loss": 65.70848083496094, "distillation_loss": 3.7839038372039795, "epoch": 2.82, "learning_rate": 3.9907955292570684e-05, "loss": 68.099, "step": 3332, "task_loss": 2.2330870628356934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6141071868969384, "compression/movement_sparsity/importance_threshold": -0.002744893461496708, "compression/movement_sparsity/linear_layer_sparsity": 0.5838278643376955, "compression/movement_sparsity/model_sparsity": 0.5637715811331061, "compression_loss": 65.75606536865234, "distillation_loss": 2.5506443977355957, "epoch": 2.82, "learning_rate": 3.9903259134028364e-05, "loss": 68.0482, "step": 3333, "task_loss": 2.2558441162109375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6145546852826838, "compression/movement_sparsity/importance_threshold": -0.0027417103615493803, "compression/movement_sparsity/linear_layer_sparsity": 0.5844740349818829, "compression/movement_sparsity/model_sparsity": 0.5643955538278123, "compression_loss": 65.80366516113281, "distillation_loss": 2.8471179008483887, "epoch": 2.82, "learning_rate": 3.989856297548606e-05, "loss": 68.0612, "step": 3334, "task_loss": 2.082033634185791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6150018375753401, "compression/movement_sparsity/importance_threshold": -0.0027385297233961685, "compression/movement_sparsity/linear_layer_sparsity": 0.5850039211432854, "compression/movement_sparsity/model_sparsity": 0.5649072367694501, "compression_loss": 65.85118865966797, "distillation_loss": 1.764566421508789, "epoch": 2.82, "learning_rate": 3.989386681694374e-05, "loss": 67.775, "step": 3335, "task_loss": 2.328819513320923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6154486439087934, "compression/movement_sparsity/importance_threshold": -0.00273535154608473, "compression/movement_sparsity/linear_layer_sparsity": 0.5855482474716949, "compression/movement_sparsity/model_sparsity": 0.5654328638139351, "compression_loss": 65.89869689941406, "distillation_loss": 3.255760431289673, "epoch": 2.82, "learning_rate": 3.988917065840143e-05, "loss": 68.3812, "step": 3336, "task_loss": 1.6573933362960815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6158951044169283, "compression/movement_sparsity/importance_threshold": -0.0027321758286627303, "compression/movement_sparsity/linear_layer_sparsity": 0.5860666148871612, "compression/movement_sparsity/model_sparsity": 0.5659334237139954, "compression_loss": 65.9461669921875, "distillation_loss": 3.451690912246704, "epoch": 2.82, "learning_rate": 3.9884474499859116e-05, "loss": 68.7225, "step": 3337, "task_loss": 1.6559584140777588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6163412192336302, "compression/movement_sparsity/importance_threshold": -0.002729002570177829, "compression/movement_sparsity/linear_layer_sparsity": 0.5865577713520823, "compression/movement_sparsity/model_sparsity": 0.5664077074433724, "compression_loss": 65.99361419677734, "distillation_loss": 2.3687052726745605, "epoch": 2.82, "learning_rate": 3.98797783413168e-05, "loss": 68.0487, "step": 3338, "task_loss": 1.9508813619613647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6167869884927843, "compression/movement_sparsity/importance_threshold": -0.0027258317696776868, "compression/movement_sparsity/linear_layer_sparsity": 0.5872615714984538, "compression/movement_sparsity/model_sparsity": 0.5670873298895737, "compression_loss": 66.041015625, "distillation_loss": 2.99855375289917, "epoch": 2.82, "learning_rate": 3.9875082182774495e-05, "loss": 68.1377, "step": 3339, "task_loss": 1.7606031894683838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6172324123282753, "compression/movement_sparsity/importance_threshold": -0.0027226634262099694, "compression/movement_sparsity/linear_layer_sparsity": 0.587671619775116, "compression/movement_sparsity/model_sparsity": 0.5674832917464763, "compression_loss": 66.08838653564453, "distillation_loss": 1.3146207332611084, "epoch": 2.82, "learning_rate": 3.987038602423218e-05, "loss": 67.7769, "step": 3340, "task_loss": 0.7704296112060547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.617677490873989, "compression/movement_sparsity/importance_threshold": -0.0027194975388223344, "compression/movement_sparsity/linear_layer_sparsity": 0.5882447191200307, "compression/movement_sparsity/model_sparsity": 0.5680367033658336, "compression_loss": 66.13570404052734, "distillation_loss": 1.7993332147598267, "epoch": 2.82, "learning_rate": 3.986568986568987e-05, "loss": 68.1525, "step": 3341, "task_loss": 0.668684184551239 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6181222242638098, "compression/movement_sparsity/importance_threshold": -0.002716334106562449, "compression/movement_sparsity/linear_layer_sparsity": 0.5888749352279214, "compression/movement_sparsity/model_sparsity": 0.5686452696116466, "compression_loss": 66.18296813964844, "distillation_loss": 2.841013193130493, "epoch": 2.82, "learning_rate": 3.9860993707147554e-05, "loss": 68.3537, "step": 3342, "task_loss": 1.5665628910064697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6185666126316237, "compression/movement_sparsity/importance_threshold": -0.002713173128477967, "compression/movement_sparsity/linear_layer_sparsity": 0.5894859415017507, "compression/movement_sparsity/model_sparsity": 0.5692352859402947, "compression_loss": 66.23020935058594, "distillation_loss": 2.5057520866394043, "epoch": 2.83, "learning_rate": 3.985629754860524e-05, "loss": 68.4157, "step": 3343, "task_loss": 2.184875726699829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6190106561113149, "compression/movement_sparsity/importance_threshold": -0.0027100146036165597, "compression/movement_sparsity/linear_layer_sparsity": 0.5901319213592559, "compression/movement_sparsity/model_sparsity": 0.5698590744024281, "compression_loss": 66.27739715576172, "distillation_loss": 1.1512799263000488, "epoch": 2.83, "learning_rate": 3.9851601390062934e-05, "loss": 67.9809, "step": 3344, "task_loss": 0.4081193208694458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6194543548367692, "compression/movement_sparsity/importance_threshold": -0.0027068585310258825, "compression/movement_sparsity/linear_layer_sparsity": 0.5907624474955051, "compression/movement_sparsity/model_sparsity": 0.5704679400261718, "compression_loss": 66.32457733154297, "distillation_loss": 1.698883056640625, "epoch": 2.83, "learning_rate": 3.984690523152062e-05, "loss": 68.7142, "step": 3345, "task_loss": 0.8805169463157654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6198977089418716, "compression/movement_sparsity/importance_threshold": -0.0027037049097535983, "compression/movement_sparsity/linear_layer_sparsity": 0.5912691292266882, "compression/movement_sparsity/model_sparsity": 0.5709572156811534, "compression_loss": 66.37174224853516, "distillation_loss": 2.839810609817505, "epoch": 2.83, "learning_rate": 3.9842209072978306e-05, "loss": 68.9747, "step": 3346, "task_loss": 1.4711060523986816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6203407185605072, "compression/movement_sparsity/importance_threshold": -0.00270055373884737, "compression/movement_sparsity/linear_layer_sparsity": 0.5918547847201235, "compression/movement_sparsity/model_sparsity": 0.5715227521067023, "compression_loss": 66.41877746582031, "distillation_loss": 2.0519232749938965, "epoch": 2.83, "learning_rate": 3.983751291443599e-05, "loss": 68.5831, "step": 3347, "task_loss": 2.003737211227417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6207833838265611, "compression/movement_sparsity/importance_threshold": -0.0026974050173548597, "compression/movement_sparsity/linear_layer_sparsity": 0.5923398479353829, "compression/movement_sparsity/model_sparsity": 0.5719911519082884, "compression_loss": 66.4658432006836, "distillation_loss": 3.3370680809020996, "epoch": 2.83, "learning_rate": 3.983281675589368e-05, "loss": 68.6563, "step": 3348, "task_loss": 2.23791766166687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6212257048739183, "compression/movement_sparsity/importance_threshold": -0.002694258744323729, "compression/movement_sparsity/linear_layer_sparsity": 0.5928152644990247, "compression/movement_sparsity/model_sparsity": 0.5724502364504168, "compression_loss": 66.51287841796875, "distillation_loss": 1.8897274732589722, "epoch": 2.83, "learning_rate": 3.982812059735137e-05, "loss": 68.7601, "step": 3349, "task_loss": 1.6552172899246216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6216676818364644, "compression/movement_sparsity/importance_threshold": -0.0026911149188016377, "compression/movement_sparsity/linear_layer_sparsity": 0.5933636927411009, "compression/movement_sparsity/model_sparsity": 0.5729798244952151, "compression_loss": 66.55982971191406, "distillation_loss": 1.4008245468139648, "epoch": 2.83, "learning_rate": 3.982342443880905e-05, "loss": 68.847, "step": 3350, "task_loss": 1.5050897598266602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.622109314848084, "compression/movement_sparsity/importance_threshold": -0.002687973539836252, "compression/movement_sparsity/linear_layer_sparsity": 0.5939198716921406, "compression/movement_sparsity/model_sparsity": 0.5735168969882799, "compression_loss": 66.60677337646484, "distillation_loss": 1.742423176765442, "epoch": 2.83, "learning_rate": 3.9818728280266745e-05, "loss": 68.947, "step": 3351, "task_loss": 2.5324747562408447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6225506040426627, "compression/movement_sparsity/importance_threshold": -0.002684834606475228, "compression/movement_sparsity/linear_layer_sparsity": 0.5943878475751776, "compression/movement_sparsity/model_sparsity": 0.5739687964600725, "compression_loss": 66.65365600585938, "distillation_loss": 1.7323476076126099, "epoch": 2.83, "learning_rate": 3.981403212172443e-05, "loss": 68.5194, "step": 3352, "task_loss": 1.0130680799484253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6229915495540852, "compression/movement_sparsity/importance_threshold": -0.002681698117766234, "compression/movement_sparsity/linear_layer_sparsity": 0.5948644923280859, "compression/movement_sparsity/model_sparsity": 0.5744290669993877, "compression_loss": 66.7005386352539, "distillation_loss": 2.964799404144287, "epoch": 2.83, "learning_rate": 3.980933596318212e-05, "loss": 69.13, "step": 3353, "task_loss": 1.9048162698745728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6234321515162371, "compression/movement_sparsity/importance_threshold": -0.002678564072756926, "compression/movement_sparsity/linear_layer_sparsity": 0.5955709992605108, "compression/movement_sparsity/model_sparsity": 0.5751113032452142, "compression_loss": 66.74736022949219, "distillation_loss": 1.523757815361023, "epoch": 2.83, "learning_rate": 3.9804639804639804e-05, "loss": 68.8175, "step": 3354, "task_loss": 1.5624064207077026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6238724100630031, "compression/movement_sparsity/importance_threshold": -0.0026754324704949704, "compression/movement_sparsity/linear_layer_sparsity": 0.5960954360773038, "compression/movement_sparsity/model_sparsity": 0.5756177240439939, "compression_loss": 66.79415893554688, "distillation_loss": 3.0026164054870605, "epoch": 2.84, "learning_rate": 3.979994364609749e-05, "loss": 69.1793, "step": 3355, "task_loss": 2.036043882369995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6243123253282686, "compression/movement_sparsity/importance_threshold": -0.002672303310028026, "compression/movement_sparsity/linear_layer_sparsity": 0.59661249183433, "compression/movement_sparsity/model_sparsity": 0.5761170173451168, "compression_loss": 66.84088897705078, "distillation_loss": 2.3569252490997314, "epoch": 2.84, "learning_rate": 3.979524748755518e-05, "loss": 69.1378, "step": 3356, "task_loss": 1.6887824535369873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6247518974459187, "compression/movement_sparsity/importance_threshold": -0.002669176590403756, "compression/movement_sparsity/linear_layer_sparsity": 0.5972492900827556, "compression/movement_sparsity/model_sparsity": 0.5767319396146884, "compression_loss": 66.88763427734375, "distillation_loss": 3.8131771087646484, "epoch": 2.84, "learning_rate": 3.979055132901287e-05, "loss": 69.5285, "step": 3357, "task_loss": 2.021353244781494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6251911265498385, "compression/movement_sparsity/importance_threshold": -0.0026660523106698227, "compression/movement_sparsity/linear_layer_sparsity": 0.5977927459469278, "compression/movement_sparsity/model_sparsity": 0.5772567260980603, "compression_loss": 66.93428039550781, "distillation_loss": 3.665510654449463, "epoch": 2.84, "learning_rate": 3.978585517047056e-05, "loss": 69.2757, "step": 3358, "task_loss": 2.4498085975646973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6256300127739135, "compression/movement_sparsity/importance_threshold": -0.0026629304698738846, "compression/movement_sparsity/linear_layer_sparsity": 0.5983763147110268, "compression/movement_sparsity/model_sparsity": 0.5778202474798452, "compression_loss": 66.9809341430664, "distillation_loss": 2.285398006439209, "epoch": 2.84, "learning_rate": 3.978115901192824e-05, "loss": 69.5359, "step": 3359, "task_loss": 1.9098994731903076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6260685562520281, "compression/movement_sparsity/importance_threshold": -0.0026598110670636096, "compression/movement_sparsity/linear_layer_sparsity": 0.5990043486962401, "compression/movement_sparsity/model_sparsity": 0.5784267065656079, "compression_loss": 67.02754974365234, "distillation_loss": 3.527888774871826, "epoch": 2.84, "learning_rate": 3.977646285338593e-05, "loss": 69.4285, "step": 3360, "task_loss": 2.369805097579956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6265067571180682, "compression/movement_sparsity/importance_threshold": -0.002656694101286653, "compression/movement_sparsity/linear_layer_sparsity": 0.5996639101806827, "compression/movement_sparsity/model_sparsity": 0.5790636100840113, "compression_loss": 67.07410430908203, "distillation_loss": 2.0617825984954834, "epoch": 2.84, "learning_rate": 3.977176669484362e-05, "loss": 69.5242, "step": 3361, "task_loss": 1.092995047569275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6269446155059183, "compression/movement_sparsity/importance_threshold": -0.0026535795715906824, "compression/movement_sparsity/linear_layer_sparsity": 0.6001333646605066, "compression/movement_sparsity/model_sparsity": 0.5795169373582424, "compression_loss": 67.12059020996094, "distillation_loss": 1.6198341846466064, "epoch": 2.84, "learning_rate": 3.976707053630131e-05, "loss": 68.7075, "step": 3362, "task_loss": 0.8860343098640442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6273821315494641, "compression/movement_sparsity/importance_threshold": -0.002650467477023355, "compression/movement_sparsity/linear_layer_sparsity": 0.6008661405342333, "compression/movement_sparsity/model_sparsity": 0.5802245401264243, "compression_loss": 67.16707611083984, "distillation_loss": 1.744773507118225, "epoch": 2.84, "learning_rate": 3.9762374377758994e-05, "loss": 69.8593, "step": 3363, "task_loss": 1.1081963777542114 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6278193053825902, "compression/movement_sparsity/importance_threshold": -0.002647357816632336, "compression/movement_sparsity/linear_layer_sparsity": 0.6014273753323504, "compression/movement_sparsity/model_sparsity": 0.580766494782666, "compression_loss": 67.21350860595703, "distillation_loss": 2.9735140800476074, "epoch": 2.84, "learning_rate": 3.975767821921668e-05, "loss": 69.9228, "step": 3364, "task_loss": 2.2808563709259033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6282561371391822, "compression/movement_sparsity/importance_threshold": -0.0026442505894652855, "compression/movement_sparsity/linear_layer_sparsity": 0.6020124942382422, "compression/movement_sparsity/model_sparsity": 0.581331513054104, "compression_loss": 67.25992584228516, "distillation_loss": 1.7775824069976807, "epoch": 2.84, "learning_rate": 3.9752982060674374e-05, "loss": 69.783, "step": 3365, "task_loss": 1.2722235918045044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.628692626953125, "compression/movement_sparsity/importance_threshold": -0.0026411457945698658, "compression/movement_sparsity/linear_layer_sparsity": 0.6024416331072893, "compression/movement_sparsity/model_sparsity": 0.5817459096828138, "compression_loss": 67.3062515258789, "distillation_loss": 2.505228042602539, "epoch": 2.84, "learning_rate": 3.974828590213206e-05, "loss": 69.3925, "step": 3366, "task_loss": 2.21913480758667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6291287749583039, "compression/movement_sparsity/importance_threshold": -0.0026380434309937386, "compression/movement_sparsity/linear_layer_sparsity": 0.6030453775630283, "compression/movement_sparsity/model_sparsity": 0.582328913659163, "compression_loss": 67.3525619506836, "distillation_loss": 2.478205680847168, "epoch": 2.85, "learning_rate": 3.974358974358974e-05, "loss": 70.2019, "step": 3367, "task_loss": 1.8805465698242188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6295645812886038, "compression/movement_sparsity/importance_threshold": -0.002634943497784566, "compression/movement_sparsity/linear_layer_sparsity": 0.603683058199859, "compression/movement_sparsity/model_sparsity": 0.5829446880043835, "compression_loss": 67.39881896972656, "distillation_loss": 2.0793824195861816, "epoch": 2.85, "learning_rate": 3.973889358504743e-05, "loss": 70.176, "step": 3368, "task_loss": 1.823570728302002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6300000460779098, "compression/movement_sparsity/importance_threshold": -0.002631845993990011, "compression/movement_sparsity/linear_layer_sparsity": 0.6041317884763318, "compression/movement_sparsity/model_sparsity": 0.5833780030154037, "compression_loss": 67.44507598876953, "distillation_loss": 2.0275721549987793, "epoch": 2.85, "learning_rate": 3.973419742650512e-05, "loss": 69.6855, "step": 3369, "task_loss": 1.2901363372802734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6304351694601077, "compression/movement_sparsity/importance_threshold": -0.0026287509186577315, "compression/movement_sparsity/linear_layer_sparsity": 0.6047397063907434, "compression/movement_sparsity/model_sparsity": 0.5839650370792809, "compression_loss": 67.49124145507812, "distillation_loss": 1.168149709701538, "epoch": 2.85, "learning_rate": 3.972950126796281e-05, "loss": 69.5404, "step": 3370, "task_loss": 1.063902735710144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6308699515690818, "compression/movement_sparsity/importance_threshold": -0.0026256582708353944, "compression/movement_sparsity/linear_layer_sparsity": 0.605253900347537, "compression/movement_sparsity/model_sparsity": 0.5844615668918131, "compression_loss": 67.53739929199219, "distillation_loss": 2.2467079162597656, "epoch": 2.85, "learning_rate": 3.97248051094205e-05, "loss": 69.6796, "step": 3371, "task_loss": 2.579721212387085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6313043925387178, "compression/movement_sparsity/importance_threshold": -0.002622568049570657, "compression/movement_sparsity/linear_layer_sparsity": 0.6058142169847462, "compression/movement_sparsity/model_sparsity": 0.5850026349287986, "compression_loss": 67.58350372314453, "distillation_loss": 2.019651412963867, "epoch": 2.85, "learning_rate": 3.9720108950878185e-05, "loss": 69.9188, "step": 3372, "task_loss": 0.9294201135635376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6317384925029005, "compression/movement_sparsity/importance_threshold": -0.002619480253911186, "compression/movement_sparsity/linear_layer_sparsity": 0.6062763619498094, "compression/movement_sparsity/model_sparsity": 0.5854489037925875, "compression_loss": 67.62958526611328, "distillation_loss": 1.591545820236206, "epoch": 2.85, "learning_rate": 3.971541279233587e-05, "loss": 69.6759, "step": 3373, "task_loss": 1.6166478395462036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6321722515955155, "compression/movement_sparsity/importance_threshold": -0.0026163948829046367, "compression/movement_sparsity/linear_layer_sparsity": 0.6068646884567952, "compression/movement_sparsity/model_sparsity": 0.5860170194741544, "compression_loss": 67.67561340332031, "distillation_loss": 1.1338162422180176, "epoch": 2.85, "learning_rate": 3.971071663379356e-05, "loss": 70.1459, "step": 3374, "task_loss": 0.4718058109283447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6326056699504472, "compression/movement_sparsity/importance_threshold": -0.0026133119355986785, "compression/movement_sparsity/linear_layer_sparsity": 0.6074451927098119, "compression/movement_sparsity/model_sparsity": 0.58657758162024, "compression_loss": 67.72154998779297, "distillation_loss": 4.011053085327148, "epoch": 2.85, "learning_rate": 3.970602047525125e-05, "loss": 70.1638, "step": 3375, "task_loss": 2.3060996532440186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6330387477015813, "compression/movement_sparsity/importance_threshold": -0.002610231411040969, "compression/movement_sparsity/linear_layer_sparsity": 0.6079020075719418, "compression/movement_sparsity/model_sparsity": 0.5870187034865288, "compression_loss": 67.76750946044922, "distillation_loss": 2.068549156188965, "epoch": 2.85, "learning_rate": 3.970132431670893e-05, "loss": 69.8709, "step": 3376, "task_loss": 1.4367406368255615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6334714849828029, "compression/movement_sparsity/importance_threshold": -0.0026071533082791696, "compression/movement_sparsity/linear_layer_sparsity": 0.608514325504211, "compression/movement_sparsity/model_sparsity": 0.5876099864141143, "compression_loss": 67.81343078613281, "distillation_loss": 3.4722397327423096, "epoch": 2.85, "learning_rate": 3.9696628158166623e-05, "loss": 70.3399, "step": 3377, "task_loss": 2.260223150253296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6339038819279967, "compression/movement_sparsity/importance_threshold": -0.0026040776263609455, "compression/movement_sparsity/linear_layer_sparsity": 0.6091262499389483, "compression/movement_sparsity/model_sparsity": 0.5882008893620186, "compression_loss": 67.85929870605469, "distillation_loss": 2.899590492248535, "epoch": 2.85, "learning_rate": 3.969193199962431e-05, "loss": 70.2612, "step": 3378, "task_loss": 1.2932850122451782 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6343359386710488, "compression/movement_sparsity/importance_threshold": -0.0026010043643339533, "compression/movement_sparsity/linear_layer_sparsity": 0.609612445950133, "compression/movement_sparsity/model_sparsity": 0.5886703830445053, "compression_loss": 67.90515899658203, "distillation_loss": 1.886909008026123, "epoch": 2.86, "learning_rate": 3.9687235841081996e-05, "loss": 70.2219, "step": 3379, "task_loss": 1.5921717882156372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6347676553458432, "compression/movement_sparsity/importance_threshold": -0.002597933521245859, "compression/movement_sparsity/linear_layer_sparsity": 0.6101647256983556, "compression/movement_sparsity/model_sparsity": 0.5892036902843651, "compression_loss": 67.95095825195312, "distillation_loss": 1.7081815004348755, "epoch": 2.86, "learning_rate": 3.968253968253968e-05, "loss": 70.1, "step": 3380, "task_loss": 1.2063145637512207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6351990320862659, "compression/movement_sparsity/importance_threshold": -0.0025948650961443226, "compression/movement_sparsity/linear_layer_sparsity": 0.610690128372727, "compression/movement_sparsity/model_sparsity": 0.5897110437605442, "compression_loss": 67.99675750732422, "distillation_loss": 2.883063793182373, "epoch": 2.86, "learning_rate": 3.967784352399737e-05, "loss": 70.1644, "step": 3381, "task_loss": 2.098947763442993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6356300690262014, "compression/movement_sparsity/importance_threshold": -0.002591799088077009, "compression/movement_sparsity/linear_layer_sparsity": 0.6111983483215351, "compression/movement_sparsity/model_sparsity": 0.5902018047906433, "compression_loss": 68.04248809814453, "distillation_loss": 2.0676727294921875, "epoch": 2.86, "learning_rate": 3.967314736545506e-05, "loss": 70.3135, "step": 3382, "task_loss": 1.1000292301177979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6360607662995356, "compression/movement_sparsity/importance_threshold": -0.002588735496091574, "compression/movement_sparsity/linear_layer_sparsity": 0.611732348320772, "compression/movement_sparsity/model_sparsity": 0.5907174602471302, "compression_loss": 68.08821868896484, "distillation_loss": 1.4434975385665894, "epoch": 2.86, "learning_rate": 3.966845120691275e-05, "loss": 70.3087, "step": 3383, "task_loss": 0.7188364863395691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6364911240401527, "compression/movement_sparsity/importance_threshold": -0.002585674319235686, "compression/movement_sparsity/linear_layer_sparsity": 0.6122613282454341, "compression/movement_sparsity/model_sparsity": 0.5912282680840477, "compression_loss": 68.1338882446289, "distillation_loss": 2.605797290802002, "epoch": 2.86, "learning_rate": 3.9663755048370435e-05, "loss": 70.4476, "step": 3384, "task_loss": 2.1669974327087402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6369211423819386, "compression/movement_sparsity/importance_threshold": -0.0025826155565570027, "compression/movement_sparsity/linear_layer_sparsity": 0.6129000224365139, "compression/movement_sparsity/model_sparsity": 0.5918450211648106, "compression_loss": 68.17952728271484, "distillation_loss": 2.3394858837127686, "epoch": 2.86, "learning_rate": 3.965905888982812e-05, "loss": 70.4723, "step": 3385, "task_loss": 1.350982666015625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6373508214587782, "compression/movement_sparsity/importance_threshold": -0.0025795592071031874, "compression/movement_sparsity/linear_layer_sparsity": 0.6134875381001005, "compression/movement_sparsity/model_sparsity": 0.5924123538579434, "compression_loss": 68.22509002685547, "distillation_loss": 1.9900925159454346, "epoch": 2.86, "learning_rate": 3.965436273128581e-05, "loss": 70.1343, "step": 3386, "task_loss": 1.4895490407943726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6377801614045565, "compression/movement_sparsity/importance_threshold": -0.0025765052699219023, "compression/movement_sparsity/linear_layer_sparsity": 0.6140598862224542, "compression/movement_sparsity/model_sparsity": 0.5929650400615456, "compression_loss": 68.27062225341797, "distillation_loss": 2.4864752292633057, "epoch": 2.86, "learning_rate": 3.96496665727435e-05, "loss": 70.5995, "step": 3387, "task_loss": 2.3740789890289307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6382091623531588, "compression/movement_sparsity/importance_threshold": -0.0025734537440608076, "compression/movement_sparsity/linear_layer_sparsity": 0.614602698181574, "compression/movement_sparsity/model_sparsity": 0.5934892047599848, "compression_loss": 68.31611633300781, "distillation_loss": 2.0325350761413574, "epoch": 2.86, "learning_rate": 3.964497041420119e-05, "loss": 70.6694, "step": 3388, "task_loss": 1.450438141822815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6386378244384701, "compression/movement_sparsity/importance_threshold": -0.0025704046285675687, "compression/movement_sparsity/linear_layer_sparsity": 0.6151505659877713, "compression/movement_sparsity/model_sparsity": 0.5940182516216007, "compression_loss": 68.36156463623047, "distillation_loss": 2.8485896587371826, "epoch": 2.86, "learning_rate": 3.964027425565887e-05, "loss": 71.1179, "step": 3389, "task_loss": 1.9161099195480347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6390661477943758, "compression/movement_sparsity/importance_threshold": -0.0025673579224898425, "compression/movement_sparsity/linear_layer_sparsity": 0.6157506974207166, "compression/movement_sparsity/model_sparsity": 0.5945977666936042, "compression_loss": 68.40697479248047, "distillation_loss": 2.1635546684265137, "epoch": 2.87, "learning_rate": 3.963557809711656e-05, "loss": 70.7693, "step": 3390, "task_loss": 1.689217448234558 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6394941325547607, "compression/movement_sparsity/importance_threshold": -0.0025643136248752942, "compression/movement_sparsity/linear_layer_sparsity": 0.6163002226862154, "compression/movement_sparsity/model_sparsity": 0.5951284140756956, "compression_loss": 68.45235443115234, "distillation_loss": 2.0831942558288574, "epoch": 2.87, "learning_rate": 3.9630881938574246e-05, "loss": 70.4039, "step": 3391, "task_loss": 0.7912447452545166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6399217788535103, "compression/movement_sparsity/importance_threshold": -0.0025612717347715842, "compression/movement_sparsity/linear_layer_sparsity": 0.6169331575043271, "compression/movement_sparsity/model_sparsity": 0.5957396056356697, "compression_loss": 68.49768829345703, "distillation_loss": 2.2185676097869873, "epoch": 2.87, "learning_rate": 3.962618578003194e-05, "loss": 70.7488, "step": 3392, "task_loss": 1.3113499879837036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6403490868245094, "compression/movement_sparsity/importance_threshold": -0.002558232251226376, "compression/movement_sparsity/linear_layer_sparsity": 0.6174881321144354, "compression/movement_sparsity/model_sparsity": 0.5962755151606193, "compression_loss": 68.54295349121094, "distillation_loss": 1.7527161836624146, "epoch": 2.87, "learning_rate": 3.962148962148962e-05, "loss": 71.082, "step": 3393, "task_loss": 1.2159423828125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6407760566016435, "compression/movement_sparsity/importance_threshold": -0.0025551951732873295, "compression/movement_sparsity/linear_layer_sparsity": 0.6179095083503515, "compression/movement_sparsity/model_sparsity": 0.5966824158265268, "compression_loss": 68.5882339477539, "distillation_loss": 1.3712267875671387, "epoch": 2.87, "learning_rate": 3.961679346294731e-05, "loss": 70.5116, "step": 3394, "task_loss": 0.7577222585678101 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6412026883187972, "compression/movement_sparsity/importance_threshold": -0.0025521605000021087, "compression/movement_sparsity/linear_layer_sparsity": 0.6185576226338636, "compression/movement_sparsity/model_sparsity": 0.5973082653905675, "compression_loss": 68.63345336914062, "distillation_loss": 2.993063449859619, "epoch": 2.87, "learning_rate": 3.9612097304405e-05, "loss": 71.2099, "step": 3395, "task_loss": 1.479042410850525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6416289821098562, "compression/movement_sparsity/importance_threshold": -0.002549128230418374, "compression/movement_sparsity/linear_layer_sparsity": 0.6190812128347543, "compression/movement_sparsity/model_sparsity": 0.5978138686573058, "compression_loss": 68.67863464355469, "distillation_loss": 2.2189247608184814, "epoch": 2.87, "learning_rate": 3.960740114586269e-05, "loss": 71.4385, "step": 3396, "task_loss": 1.3366414308547974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6420549381087054, "compression/movement_sparsity/importance_threshold": -0.0025460983635837865, "compression/movement_sparsity/linear_layer_sparsity": 0.619785466099496, "compression/movement_sparsity/model_sparsity": 0.5984939286558671, "compression_loss": 68.7237548828125, "distillation_loss": 4.362691879272461, "epoch": 2.87, "learning_rate": 3.960270498732037e-05, "loss": 71.401, "step": 3397, "task_loss": 1.9793548583984375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6424805564492297, "compression/movement_sparsity/importance_threshold": -0.0025430708985460107, "compression/movement_sparsity/linear_layer_sparsity": 0.6201439781236359, "compression/movement_sparsity/model_sparsity": 0.5988401246890658, "compression_loss": 68.76881408691406, "distillation_loss": 1.6483399868011475, "epoch": 2.87, "learning_rate": 3.9598008828778064e-05, "loss": 71.2065, "step": 3398, "task_loss": 1.2753628492355347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.642905837265315, "compression/movement_sparsity/importance_threshold": -0.0025400458343527044, "compression/movement_sparsity/linear_layer_sparsity": 0.6206104634857185, "compression/movement_sparsity/model_sparsity": 0.599290584843884, "compression_loss": 68.81392669677734, "distillation_loss": 2.7444567680358887, "epoch": 2.87, "learning_rate": 3.959331267023575e-05, "loss": 71.0334, "step": 3399, "task_loss": 1.9229516983032227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6433307806908455, "compression/movement_sparsity/importance_threshold": -0.0025370231700515347, "compression/movement_sparsity/linear_layer_sparsity": 0.6211119343556447, "compression/movement_sparsity/model_sparsity": 0.5997748286467235, "compression_loss": 68.85897827148438, "distillation_loss": 2.304443359375, "epoch": 2.87, "learning_rate": 3.9588616511693436e-05, "loss": 71.2507, "step": 3400, "task_loss": 2.254709482192993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6437553868597072, "compression/movement_sparsity/importance_threshold": -0.0025340029046901575, "compression/movement_sparsity/linear_layer_sparsity": 0.6216746477505487, "compression/movement_sparsity/model_sparsity": 0.6003182111054036, "compression_loss": 68.90399169921875, "distillation_loss": 2.9952778816223145, "epoch": 2.87, "learning_rate": 3.958392035315112e-05, "loss": 71.1499, "step": 3401, "task_loss": 1.6165544986724854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6441796559057844, "compression/movement_sparsity/importance_threshold": -0.00253098503731624, "compression/movement_sparsity/linear_layer_sparsity": 0.6221724579010107, "compression/movement_sparsity/model_sparsity": 0.6007989199457541, "compression_loss": 68.94898986816406, "distillation_loss": 2.4581074714660645, "epoch": 2.88, "learning_rate": 3.957922419460881e-05, "loss": 71.4093, "step": 3402, "task_loss": 1.5678703784942627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.644603587962963, "compression/movement_sparsity/importance_threshold": -0.0025279695669774404, "compression/movement_sparsity/linear_layer_sparsity": 0.6228078014009848, "compression/movement_sparsity/model_sparsity": 0.6014124374419588, "compression_loss": 68.99390411376953, "distillation_loss": 3.2353034019470215, "epoch": 2.88, "learning_rate": 3.95745280360665e-05, "loss": 71.5503, "step": 3403, "task_loss": 2.9008982181549072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6450271831651275, "compression/movement_sparsity/importance_threshold": -0.0025249564927214236, "compression/movement_sparsity/linear_layer_sparsity": 0.6233159259564518, "compression/movement_sparsity/model_sparsity": 0.6019031063557716, "compression_loss": 69.03880310058594, "distillation_loss": 1.7541284561157227, "epoch": 2.88, "learning_rate": 3.956983187752419e-05, "loss": 71.1839, "step": 3404, "task_loss": 1.8149183988571167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6454504416461635, "compression/movement_sparsity/importance_threshold": -0.002521945813595849, "compression/movement_sparsity/linear_layer_sparsity": 0.6238100396149466, "compression/movement_sparsity/model_sparsity": 0.6023802456900258, "compression_loss": 69.08366394042969, "distillation_loss": 2.8180577754974365, "epoch": 2.88, "learning_rate": 3.9565135718981875e-05, "loss": 72.2597, "step": 3405, "task_loss": 1.4228739738464355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6458733635399561, "compression/movement_sparsity/importance_threshold": -0.0025189375286483783, "compression/movement_sparsity/linear_layer_sparsity": 0.6243444927325537, "compression/movement_sparsity/model_sparsity": 0.6028963386988729, "compression_loss": 69.12847900390625, "distillation_loss": 1.888380765914917, "epoch": 2.88, "learning_rate": 3.956043956043956e-05, "loss": 71.2641, "step": 3406, "task_loss": 1.4192883968353271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6462959489803901, "compression/movement_sparsity/importance_threshold": -0.0025159316369266754, "compression/movement_sparsity/linear_layer_sparsity": 0.624861381551233, "compression/movement_sparsity/model_sparsity": 0.6033954707964947, "compression_loss": 69.17327117919922, "distillation_loss": 3.7989726066589355, "epoch": 2.88, "learning_rate": 3.955574340189725e-05, "loss": 71.9687, "step": 3407, "task_loss": 1.3938785791397095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.646718198101351, "compression/movement_sparsity/importance_threshold": -0.0025129281374784004, "compression/movement_sparsity/linear_layer_sparsity": 0.6253508924810205, "compression/movement_sparsity/model_sparsity": 0.6038681655199322, "compression_loss": 69.218017578125, "distillation_loss": 3.3060951232910156, "epoch": 2.88, "learning_rate": 3.955104724335494e-05, "loss": 71.6878, "step": 3408, "task_loss": 2.132164716720581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6471401110367236, "compression/movement_sparsity/importance_threshold": -0.002509927029351217, "compression/movement_sparsity/linear_layer_sparsity": 0.6259218454757609, "compression/movement_sparsity/model_sparsity": 0.6044195045228463, "compression_loss": 69.26273345947266, "distillation_loss": 2.484198808670044, "epoch": 2.88, "learning_rate": 3.954635108481263e-05, "loss": 71.847, "step": 3409, "task_loss": 1.1121826171875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6475616879203937, "compression/movement_sparsity/importance_threshold": -0.002506928311592783, "compression/movement_sparsity/linear_layer_sparsity": 0.6265202479043991, "compression/movement_sparsity/model_sparsity": 0.6049973499871596, "compression_loss": 69.30743408203125, "distillation_loss": 2.921623706817627, "epoch": 2.88, "learning_rate": 3.954165492627031e-05, "loss": 71.79, "step": 3410, "task_loss": 1.2422878742218018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6479829288862455, "compression/movement_sparsity/importance_threshold": -0.0025039319832507664, "compression/movement_sparsity/linear_layer_sparsity": 0.6270705005441236, "compression/movement_sparsity/model_sparsity": 0.6055286997559345, "compression_loss": 69.35213470458984, "distillation_loss": 2.282045841217041, "epoch": 2.88, "learning_rate": 3.9536958767728e-05, "loss": 72.6621, "step": 3411, "task_loss": 1.4272512197494507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.648403834068165, "compression/movement_sparsity/importance_threshold": -0.002500938043372824, "compression/movement_sparsity/linear_layer_sparsity": 0.6275621339757502, "compression/movement_sparsity/model_sparsity": 0.6060034440667433, "compression_loss": 69.39673614501953, "distillation_loss": 3.643188238143921, "epoch": 2.88, "learning_rate": 3.9532262609185686e-05, "loss": 72.0167, "step": 3412, "task_loss": 2.496001720428467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6488244036000367, "compression/movement_sparsity/importance_threshold": -0.002497946491006621, "compression/movement_sparsity/linear_layer_sparsity": 0.6280036501308034, "compression/movement_sparsity/model_sparsity": 0.6064297927836079, "compression_loss": 69.44134521484375, "distillation_loss": 2.2212557792663574, "epoch": 2.88, "learning_rate": 3.952756645064338e-05, "loss": 72.4427, "step": 3413, "task_loss": 1.879401445388794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6492446376157464, "compression/movement_sparsity/importance_threshold": -0.002494957325199814, "compression/movement_sparsity/linear_layer_sparsity": 0.628500947542057, "compression/movement_sparsity/model_sparsity": 0.6069100064989191, "compression_loss": 69.48588562011719, "distillation_loss": 3.1814351081848145, "epoch": 2.89, "learning_rate": 3.952287029210106e-05, "loss": 72.6178, "step": 3414, "task_loss": 2.4882233142852783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6496645362491783, "compression/movement_sparsity/importance_threshold": -0.0024919705450000725, "compression/movement_sparsity/linear_layer_sparsity": 0.6290928512993336, "compression/movement_sparsity/model_sparsity": 0.6074815765412244, "compression_loss": 69.53041076660156, "distillation_loss": 3.5423412322998047, "epoch": 2.89, "learning_rate": 3.951817413355875e-05, "loss": 72.6978, "step": 3415, "task_loss": 2.9733970165252686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6500840996342183, "compression/movement_sparsity/importance_threshold": -0.0024889861494550537, "compression/movement_sparsity/linear_layer_sparsity": 0.6296285803028777, "compression/movement_sparsity/model_sparsity": 0.6079989016054015, "compression_loss": 69.57489776611328, "distillation_loss": 2.893096685409546, "epoch": 2.89, "learning_rate": 3.951347797501644e-05, "loss": 71.9816, "step": 3416, "task_loss": 1.4074283838272095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6505033279047514, "compression/movement_sparsity/importance_threshold": -0.002486004137612419, "compression/movement_sparsity/linear_layer_sparsity": 0.6301756134173405, "compression/movement_sparsity/model_sparsity": 0.6085271424495119, "compression_loss": 69.61932373046875, "distillation_loss": 2.069347858428955, "epoch": 2.89, "learning_rate": 3.9508781816474125e-05, "loss": 72.1268, "step": 3417, "task_loss": 1.6884444952011108 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6509222211946627, "compression/movement_sparsity/importance_threshold": -0.0024830245085198316, "compression/movement_sparsity/linear_layer_sparsity": 0.6307846879760127, "compression/movement_sparsity/model_sparsity": 0.6091152934233612, "compression_loss": 69.66372680664062, "distillation_loss": 5.3722147941589355, "epoch": 2.89, "learning_rate": 3.950408565793182e-05, "loss": 72.8052, "step": 3418, "task_loss": 3.304370641708374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6513407796378373, "compression/movement_sparsity/importance_threshold": -0.0024800472612249522, "compression/movement_sparsity/linear_layer_sparsity": 0.6312222572315784, "compression/movement_sparsity/model_sparsity": 0.6095378308288777, "compression_loss": 69.7081069946289, "distillation_loss": 2.6325674057006836, "epoch": 2.89, "learning_rate": 3.94993894993895e-05, "loss": 72.0141, "step": 3419, "task_loss": 1.5015544891357422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6517590033681602, "compression/movement_sparsity/importance_threshold": -0.002477072394775446, "compression/movement_sparsity/linear_layer_sparsity": 0.6317996969735127, "compression/movement_sparsity/model_sparsity": 0.6100954337392641, "compression_loss": 69.75243377685547, "distillation_loss": 2.7297139167785645, "epoch": 2.89, "learning_rate": 3.949469334084719e-05, "loss": 72.1578, "step": 3420, "task_loss": 2.3853299617767334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6521768925195169, "compression/movement_sparsity/importance_threshold": -0.00247409990821897, "compression/movement_sparsity/linear_layer_sparsity": 0.6323033976627869, "compression/movement_sparsity/model_sparsity": 0.610581830760297, "compression_loss": 69.79674530029297, "distillation_loss": 2.3629817962646484, "epoch": 2.89, "learning_rate": 3.948999718230488e-05, "loss": 72.0237, "step": 3421, "task_loss": 2.3335561752319336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6525944472257921, "compression/movement_sparsity/importance_threshold": -0.0024711298006031912, "compression/movement_sparsity/linear_layer_sparsity": 0.6327206242883657, "compression/movement_sparsity/model_sparsity": 0.6109847243677481, "compression_loss": 69.84098815917969, "distillation_loss": 4.020607948303223, "epoch": 2.89, "learning_rate": 3.948530102376257e-05, "loss": 72.5666, "step": 3422, "task_loss": 2.2750093936920166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6530116676208715, "compression/movement_sparsity/importance_threshold": -0.0024681620709757655, "compression/movement_sparsity/linear_layer_sparsity": 0.6332040777409941, "compression/movement_sparsity/model_sparsity": 0.6114515697070019, "compression_loss": 69.88518524169922, "distillation_loss": 3.546269416809082, "epoch": 2.89, "learning_rate": 3.948060486522025e-05, "loss": 72.1798, "step": 3423, "task_loss": 1.1587473154067993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6534285538386396, "compression/movement_sparsity/importance_threshold": -0.00246519671838436, "compression/movement_sparsity/linear_layer_sparsity": 0.6337599705120104, "compression/movement_sparsity/model_sparsity": 0.6119883658512076, "compression_loss": 69.92936706542969, "distillation_loss": 3.3433284759521484, "epoch": 2.89, "learning_rate": 3.9475908706677936e-05, "loss": 73.2347, "step": 3424, "task_loss": 1.9090042114257812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.653845106012982, "compression/movement_sparsity/importance_threshold": -0.002462233741876634, "compression/movement_sparsity/linear_layer_sparsity": 0.6342028102496711, "compression/movement_sparsity/model_sparsity": 0.6124159926815452, "compression_loss": 69.97348022460938, "distillation_loss": 2.921900510787964, "epoch": 2.89, "learning_rate": 3.947121254813563e-05, "loss": 72.4734, "step": 3425, "task_loss": 2.296323299407959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6542613242777837, "compression/movement_sparsity/importance_threshold": -0.00245927314050025, "compression/movement_sparsity/linear_layer_sparsity": 0.6348216507016371, "compression/movement_sparsity/model_sparsity": 0.6130135740602104, "compression_loss": 70.01762390136719, "distillation_loss": 2.8255298137664795, "epoch": 2.9, "learning_rate": 3.9466516389593315e-05, "loss": 72.4939, "step": 3426, "task_loss": 1.6284104585647583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6546772087669298, "compression/movement_sparsity/importance_threshold": -0.0024563149133028693, "compression/movement_sparsity/linear_layer_sparsity": 0.6354910377002115, "compression/movement_sparsity/model_sparsity": 0.6136599655561085, "compression_loss": 70.06169891357422, "distillation_loss": 1.7196850776672363, "epoch": 2.9, "learning_rate": 3.9461820231051e-05, "loss": 72.4635, "step": 3427, "task_loss": 0.7680805325508118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6550927596143055, "compression/movement_sparsity/importance_threshold": -0.002453359059332155, "compression/movement_sparsity/linear_layer_sparsity": 0.6359837920035959, "compression/movement_sparsity/model_sparsity": 0.6141357922332821, "compression_loss": 70.10574340820312, "distillation_loss": 2.092924118041992, "epoch": 2.9, "learning_rate": 3.945712407250869e-05, "loss": 72.6954, "step": 3428, "task_loss": 1.129451036453247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6555079769537958, "compression/movement_sparsity/importance_threshold": -0.0024504055776357683, "compression/movement_sparsity/linear_layer_sparsity": 0.6366001641528614, "compression/movement_sparsity/model_sparsity": 0.6147309901030377, "compression_loss": 70.14974212646484, "distillation_loss": 3.4465861320495605, "epoch": 2.9, "learning_rate": 3.945242791396638e-05, "loss": 73.0203, "step": 3429, "task_loss": 2.6109068393707275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6559228609192862, "compression/movement_sparsity/importance_threshold": -0.0024474544672613693, "compression/movement_sparsity/linear_layer_sparsity": 0.637222092964245, "compression/movement_sparsity/model_sparsity": 0.6153315537464735, "compression_loss": 70.19369506835938, "distillation_loss": 2.1734304428100586, "epoch": 2.9, "learning_rate": 3.944773175542407e-05, "loss": 72.9306, "step": 3430, "task_loss": 2.208414316177368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6563374116446614, "compression/movement_sparsity/importance_threshold": -0.0024445057272566234, "compression/movement_sparsity/linear_layer_sparsity": 0.6377487357520506, "compression/movement_sparsity/model_sparsity": 0.6158401047343752, "compression_loss": 70.23760986328125, "distillation_loss": 3.582955837249756, "epoch": 2.9, "learning_rate": 3.944303559688175e-05, "loss": 72.9929, "step": 3431, "task_loss": 1.3441781997680664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6567516292638069, "compression/movement_sparsity/importance_threshold": -0.0024415593566691884, "compression/movement_sparsity/linear_layer_sparsity": 0.6381118743532331, "compression/movement_sparsity/model_sparsity": 0.6161907684074622, "compression_loss": 70.28150177001953, "distillation_loss": 3.2215065956115723, "epoch": 2.9, "learning_rate": 3.943833943833944e-05, "loss": 73.159, "step": 3432, "task_loss": 2.6126832962036133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6571655139106075, "compression/movement_sparsity/importance_threshold": -0.0024386153545467304, "compression/movement_sparsity/linear_layer_sparsity": 0.6385835109557344, "compression/movement_sparsity/model_sparsity": 0.6166462028417437, "compression_loss": 70.32530975341797, "distillation_loss": 1.99333655834198, "epoch": 2.9, "learning_rate": 3.9433643279797126e-05, "loss": 72.6292, "step": 3433, "task_loss": 0.6765602231025696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6575790657189489, "compression/movement_sparsity/importance_threshold": -0.0024356737199369063, "compression/movement_sparsity/linear_layer_sparsity": 0.6391448649955279, "compression/movement_sparsity/model_sparsity": 0.6171882726433433, "compression_loss": 70.36913299560547, "distillation_loss": 2.844606399536133, "epoch": 2.9, "learning_rate": 3.942894712125482e-05, "loss": 72.8603, "step": 3434, "task_loss": 2.0053584575653076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6579922848227153, "compression/movement_sparsity/importance_threshold": -0.002432734451887384, "compression/movement_sparsity/linear_layer_sparsity": 0.6396136397977966, "compression/movement_sparsity/model_sparsity": 0.6176409435890341, "compression_loss": 70.41292572021484, "distillation_loss": 3.823568344116211, "epoch": 2.9, "learning_rate": 3.9424250962712506e-05, "loss": 73.3271, "step": 3435, "task_loss": 2.567326545715332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6584051713557927, "compression/movement_sparsity/importance_threshold": -0.0024297975494458205, "compression/movement_sparsity/linear_layer_sparsity": 0.640193106648229, "compression/movement_sparsity/model_sparsity": 0.6182005039705055, "compression_loss": 70.4566650390625, "distillation_loss": 3.597499370574951, "epoch": 2.9, "learning_rate": 3.941955480417019e-05, "loss": 73.184, "step": 3436, "task_loss": 2.301441192626953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.658817725452066, "compression/movement_sparsity/importance_threshold": -0.0024268630116598792, "compression/movement_sparsity/linear_layer_sparsity": 0.640652866779765, "compression/movement_sparsity/model_sparsity": 0.6186444699271356, "compression_loss": 70.50041198730469, "distillation_loss": 1.5465266704559326, "epoch": 2.9, "learning_rate": 3.941485864562788e-05, "loss": 72.5083, "step": 3437, "task_loss": 1.1424626111984253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6592299472454202, "compression/movement_sparsity/importance_threshold": -0.0024239308375772223, "compression/movement_sparsity/linear_layer_sparsity": 0.6410778202659719, "compression/movement_sparsity/model_sparsity": 0.6190548249537815, "compression_loss": 70.5440673828125, "distillation_loss": 2.3814616203308105, "epoch": 2.91, "learning_rate": 3.9410162487085565e-05, "loss": 73.1352, "step": 3438, "task_loss": 1.7009268999099731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6596418368697405, "compression/movement_sparsity/importance_threshold": -0.002421001026245511, "compression/movement_sparsity/linear_layer_sparsity": 0.6414855552541125, "compression/movement_sparsity/model_sparsity": 0.6194485529907401, "compression_loss": 70.58775329589844, "distillation_loss": 2.1733169555664062, "epoch": 2.91, "learning_rate": 3.940546632854326e-05, "loss": 72.4818, "step": 3439, "task_loss": 3.8417162895202637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.660053394458912, "compression/movement_sparsity/importance_threshold": -0.0024180735767124085, "compression/movement_sparsity/linear_layer_sparsity": 0.6419679832283244, "compression/movement_sparsity/model_sparsity": 0.6199144080799156, "compression_loss": 70.63134002685547, "distillation_loss": 3.4408681392669678, "epoch": 2.91, "learning_rate": 3.940077017000094e-05, "loss": 73.7244, "step": 3440, "task_loss": 2.181762456893921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6604646201468201, "compression/movement_sparsity/importance_threshold": -0.0024151484880255746, "compression/movement_sparsity/linear_layer_sparsity": 0.6423945703254973, "compression/movement_sparsity/model_sparsity": 0.6203263405979653, "compression_loss": 70.67496490478516, "distillation_loss": 3.270601987838745, "epoch": 2.91, "learning_rate": 3.939607401145863e-05, "loss": 73.6662, "step": 3441, "task_loss": 1.8095155954360962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6608755140673495, "compression/movement_sparsity/importance_threshold": -0.0024122257592326737, "compression/movement_sparsity/linear_layer_sparsity": 0.6428777137497672, "compression/movement_sparsity/model_sparsity": 0.6207928865592884, "compression_loss": 70.71849822998047, "distillation_loss": 1.5864430665969849, "epoch": 2.91, "learning_rate": 3.939137785291632e-05, "loss": 73.0387, "step": 3442, "task_loss": 0.5164861083030701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.661286076354386, "compression/movement_sparsity/importance_threshold": -0.0024093053893813627, "compression/movement_sparsity/linear_layer_sparsity": 0.6433642555618133, "compression/movement_sparsity/model_sparsity": 0.6212627141633131, "compression_loss": 70.76199340820312, "distillation_loss": 3.2117581367492676, "epoch": 2.91, "learning_rate": 3.9386681694374e-05, "loss": 73.8259, "step": 3443, "task_loss": 1.9874181747436523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6616963071418138, "compression/movement_sparsity/importance_threshold": -0.0024063873775193104, "compression/movement_sparsity/linear_layer_sparsity": 0.6438055928543519, "compression/movement_sparsity/model_sparsity": 0.6216888901621407, "compression_loss": 70.80549621582031, "distillation_loss": 2.873431921005249, "epoch": 2.91, "learning_rate": 3.938198553583169e-05, "loss": 73.5065, "step": 3444, "task_loss": 1.7315844297409058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.662106206563519, "compression/movement_sparsity/importance_threshold": -0.0024034717226941726, "compression/movement_sparsity/linear_layer_sparsity": 0.6441991500071735, "compression/movement_sparsity/model_sparsity": 0.6220689274160395, "compression_loss": 70.84894561767578, "distillation_loss": 2.020537853240967, "epoch": 2.91, "learning_rate": 3.9377289377289376e-05, "loss": 73.831, "step": 3445, "task_loss": 2.001110315322876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.662515774753386, "compression/movement_sparsity/importance_threshold": -0.0024005584239536167, "compression/movement_sparsity/linear_layer_sparsity": 0.6447511912720435, "compression/movement_sparsity/model_sparsity": 0.6226020043651835, "compression_loss": 70.89240264892578, "distillation_loss": 2.234612226486206, "epoch": 2.91, "learning_rate": 3.937259321874707e-05, "loss": 73.3153, "step": 3446, "task_loss": 1.1659191846847534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6629250118453004, "compression/movement_sparsity/importance_threshold": -0.0023976474803453, "compression/movement_sparsity/linear_layer_sparsity": 0.6452179508899817, "compression/movement_sparsity/model_sparsity": 0.6230527293543251, "compression_loss": 70.9358139038086, "distillation_loss": 3.4513649940490723, "epoch": 2.91, "learning_rate": 3.9367897060204755e-05, "loss": 73.4648, "step": 3447, "task_loss": 2.1113128662109375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6633339179731472, "compression/movement_sparsity/importance_threshold": -0.002394738890916886, "compression/movement_sparsity/linear_layer_sparsity": 0.6456839950578618, "compression/movement_sparsity/model_sparsity": 0.6235027634713188, "compression_loss": 70.97920227050781, "distillation_loss": 3.0909903049468994, "epoch": 2.91, "learning_rate": 3.936320090166244e-05, "loss": 73.7078, "step": 3448, "task_loss": 1.6161836385726929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6637424932708114, "compression/movement_sparsity/importance_threshold": -0.0023918326547160366, "compression/movement_sparsity/linear_layer_sparsity": 0.6462463626519044, "compression/movement_sparsity/model_sparsity": 0.624045812008461, "compression_loss": 71.02249145507812, "distillation_loss": 5.888167858123779, "epoch": 2.91, "learning_rate": 3.935850474312013e-05, "loss": 74.7655, "step": 3449, "task_loss": 2.96809458732605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6641507378721785, "compression/movement_sparsity/importance_threshold": -0.0023889287707904126, "compression/movement_sparsity/linear_layer_sparsity": 0.6468394826742798, "compression/movement_sparsity/model_sparsity": 0.6246185565334172, "compression_loss": 71.06578826904297, "distillation_loss": 2.997404098510742, "epoch": 2.92, "learning_rate": 3.9353808584577814e-05, "loss": 73.4194, "step": 3450, "task_loss": 1.1337159872055054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6645586519111331, "compression/movement_sparsity/importance_threshold": -0.0023860272381876794, "compression/movement_sparsity/linear_layer_sparsity": 0.6474698537963497, "compression/movement_sparsity/model_sparsity": 0.6252272724681956, "compression_loss": 71.10909271240234, "distillation_loss": 3.1485161781311035, "epoch": 2.92, "learning_rate": 3.934911242603551e-05, "loss": 74.1581, "step": 3451, "task_loss": 1.9535713195800781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6649662355215611, "compression/movement_sparsity/importance_threshold": -0.002383128055955492, "compression/movement_sparsity/linear_layer_sparsity": 0.6481598696049342, "compression/movement_sparsity/model_sparsity": 0.6258935841110184, "compression_loss": 71.15226745605469, "distillation_loss": 2.062156915664673, "epoch": 2.92, "learning_rate": 3.9344416267493194e-05, "loss": 73.9594, "step": 3452, "task_loss": 1.3615081310272217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6653734888373467, "compression/movement_sparsity/importance_threshold": -0.00238023122314152, "compression/movement_sparsity/linear_layer_sparsity": 0.648711028481399, "compression/movement_sparsity/model_sparsity": 0.6264258089845136, "compression_loss": 71.1954345703125, "distillation_loss": 1.6314300298690796, "epoch": 2.92, "learning_rate": 3.933972010895088e-05, "loss": 74.0924, "step": 3453, "task_loss": 1.21321702003479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6657804119923759, "compression/movement_sparsity/importance_threshold": -0.0023773367387934195, "compression/movement_sparsity/linear_layer_sparsity": 0.6491508633288156, "compression/movement_sparsity/model_sparsity": 0.626850534151831, "compression_loss": 71.23858642578125, "distillation_loss": 3.350534677505493, "epoch": 2.92, "learning_rate": 3.9335023950408567e-05, "loss": 74.5487, "step": 3454, "task_loss": 2.208475351333618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6661870051205332, "compression/movement_sparsity/importance_threshold": -0.0023744446019588568, "compression/movement_sparsity/linear_layer_sparsity": 0.6496471471858202, "compression/movement_sparsity/model_sparsity": 0.6273297691315999, "compression_loss": 71.28167724609375, "distillation_loss": 2.6933157444000244, "epoch": 2.92, "learning_rate": 3.933032779186625e-05, "loss": 74.671, "step": 3455, "task_loss": 2.544951915740967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6665932683557042, "compression/movement_sparsity/importance_threshold": -0.00237155481168549, "compression/movement_sparsity/linear_layer_sparsity": 0.6501502039700419, "compression/movement_sparsity/model_sparsity": 0.6278155443676999, "compression_loss": 71.32473754882812, "distillation_loss": 2.5802605152130127, "epoch": 2.92, "learning_rate": 3.9325631633323946e-05, "loss": 73.9922, "step": 3456, "task_loss": 2.2229390144348145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6669992018317739, "compression/movement_sparsity/importance_threshold": -0.0023686673670209827, "compression/movement_sparsity/linear_layer_sparsity": 0.6506774071937264, "compression/movement_sparsity/model_sparsity": 0.628324636538784, "compression_loss": 71.3677978515625, "distillation_loss": 3.4706969261169434, "epoch": 2.92, "learning_rate": 3.9320935474781626e-05, "loss": 74.231, "step": 3457, "task_loss": 1.6241064071655273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6674048056826272, "compression/movement_sparsity/importance_threshold": -0.002365782267012997, "compression/movement_sparsity/linear_layer_sparsity": 0.6511060452477327, "compression/movement_sparsity/model_sparsity": 0.6287385495569904, "compression_loss": 71.41079711914062, "distillation_loss": 2.88515043258667, "epoch": 2.92, "learning_rate": 3.931623931623932e-05, "loss": 73.8816, "step": 3458, "task_loss": 2.231069326400757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6678100800421496, "compression/movement_sparsity/importance_threshold": -0.0023628995107091936, "compression/movement_sparsity/linear_layer_sparsity": 0.6515866488242962, "compression/movement_sparsity/model_sparsity": 0.6292026429221893, "compression_loss": 71.4537582397461, "distillation_loss": 2.3767266273498535, "epoch": 2.92, "learning_rate": 3.9311543157697005e-05, "loss": 74.0626, "step": 3459, "task_loss": 1.4785529375076294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6682150250442258, "compression/movement_sparsity/importance_threshold": -0.0023600190971572368, "compression/movement_sparsity/linear_layer_sparsity": 0.6518686553888843, "compression/movement_sparsity/model_sparsity": 0.6294749616937302, "compression_loss": 71.49667358398438, "distillation_loss": 2.569472074508667, "epoch": 2.92, "learning_rate": 3.93068469991547e-05, "loss": 73.898, "step": 3460, "task_loss": 2.510350227355957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6686196408227416, "compression/movement_sparsity/importance_threshold": -0.0023571410254047843, "compression/movement_sparsity/linear_layer_sparsity": 0.6524335747548009, "compression/movement_sparsity/model_sparsity": 0.6300204743415323, "compression_loss": 71.53956604003906, "distillation_loss": 2.088343620300293, "epoch": 2.93, "learning_rate": 3.930215084061238e-05, "loss": 74.2496, "step": 3461, "task_loss": 1.9547892808914185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6690239275115815, "compression/movement_sparsity/importance_threshold": -0.0023542652944995024, "compression/movement_sparsity/linear_layer_sparsity": 0.6528680556509489, "compression/movement_sparsity/model_sparsity": 0.6304400294822781, "compression_loss": 71.58240509033203, "distillation_loss": 1.539374828338623, "epoch": 2.93, "learning_rate": 3.9297454682070064e-05, "loss": 74.2342, "step": 3462, "task_loss": 1.4872456789016724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6694278852446311, "compression/movement_sparsity/importance_threshold": -0.002351391903489048, "compression/movement_sparsity/linear_layer_sparsity": 0.6532818481162486, "compression/movement_sparsity/model_sparsity": 0.6308396069034202, "compression_loss": 71.62522888183594, "distillation_loss": 2.1612014770507812, "epoch": 2.93, "learning_rate": 3.929275852352776e-05, "loss": 74.3921, "step": 3463, "task_loss": 0.8299813866615295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6698315141557751, "compression/movement_sparsity/importance_threshold": -0.002348520851421089, "compression/movement_sparsity/linear_layer_sparsity": 0.653846481302142, "compression/movement_sparsity/model_sparsity": 0.6313848432023633, "compression_loss": 71.6679916381836, "distillation_loss": 1.6936122179031372, "epoch": 2.93, "learning_rate": 3.9288062364985443e-05, "loss": 73.7498, "step": 3464, "task_loss": 1.5178842544555664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6702348143788992, "compression/movement_sparsity/importance_threshold": -0.002345652137343282, "compression/movement_sparsity/linear_layer_sparsity": 0.6544416284330156, "compression/movement_sparsity/model_sparsity": 0.6319595451984046, "compression_loss": 71.71072387695312, "distillation_loss": 2.8177216053009033, "epoch": 2.93, "learning_rate": 3.9283366206443137e-05, "loss": 74.174, "step": 3465, "task_loss": 1.8447030782699585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6706377860478878, "compression/movement_sparsity/importance_threshold": -0.0023427857603032926, "compression/movement_sparsity/linear_layer_sparsity": 0.6548749288365675, "compression/movement_sparsity/model_sparsity": 0.6323779604001066, "compression_loss": 71.7534408569336, "distillation_loss": 2.7277984619140625, "epoch": 2.93, "learning_rate": 3.9278670047900816e-05, "loss": 74.5242, "step": 3466, "task_loss": 1.4381777048110962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6710404292966268, "compression/movement_sparsity/importance_threshold": -0.00233992171934878, "compression/movement_sparsity/linear_layer_sparsity": 0.6553685297558541, "compression/movement_sparsity/model_sparsity": 0.6328546046093216, "compression_loss": 71.79607391357422, "distillation_loss": 2.7231903076171875, "epoch": 2.93, "learning_rate": 3.927397388935851e-05, "loss": 75.0845, "step": 3467, "task_loss": 1.637817144393921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.671442744259001, "compression/movement_sparsity/importance_threshold": -0.002337060013527407, "compression/movement_sparsity/linear_layer_sparsity": 0.6559558665569262, "compression/movement_sparsity/model_sparsity": 0.6334217645844176, "compression_loss": 71.83865356445312, "distillation_loss": 2.4907736778259277, "epoch": 2.93, "learning_rate": 3.9269277730816196e-05, "loss": 74.2513, "step": 3468, "task_loss": 1.3326846361160278 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6718447310688952, "compression/movement_sparsity/importance_threshold": -0.002334200641886837, "compression/movement_sparsity/linear_layer_sparsity": 0.6564735900673401, "compression/movement_sparsity/model_sparsity": 0.633921702699545, "compression_loss": 71.88130187988281, "distillation_loss": 2.6234970092773438, "epoch": 2.93, "learning_rate": 3.926458157227388e-05, "loss": 74.3174, "step": 3469, "task_loss": 1.7897729873657227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6722463898601954, "compression/movement_sparsity/importance_threshold": -0.0023313436034747273, "compression/movement_sparsity/linear_layer_sparsity": 0.6571025064409584, "compression/movement_sparsity/model_sparsity": 0.6345290138609564, "compression_loss": 71.92387390136719, "distillation_loss": 3.249788284301758, "epoch": 2.93, "learning_rate": 3.925988541373157e-05, "loss": 75.3668, "step": 3470, "task_loss": 1.9441148042678833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6726477207667857, "compression/movement_sparsity/importance_threshold": -0.002328488897338746, "compression/movement_sparsity/linear_layer_sparsity": 0.6575494719406211, "compression/movement_sparsity/model_sparsity": 0.6349606247206789, "compression_loss": 71.9664306640625, "distillation_loss": 2.7604620456695557, "epoch": 2.93, "learning_rate": 3.9255189255189255e-05, "loss": 74.7832, "step": 3471, "task_loss": 2.106065511703491 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6730487239225522, "compression/movement_sparsity/importance_threshold": -0.002325636522526549, "compression/movement_sparsity/linear_layer_sparsity": 0.6579700731056409, "compression/movement_sparsity/model_sparsity": 0.6353667769417598, "compression_loss": 72.00889587402344, "distillation_loss": 3.091581344604492, "epoch": 2.93, "learning_rate": 3.925049309664695e-05, "loss": 75.146, "step": 3472, "task_loss": 2.580735683441162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6734493994613793, "compression/movement_sparsity/importance_threshold": -0.002322786478085803, "compression/movement_sparsity/linear_layer_sparsity": 0.6584489238295619, "compression/movement_sparsity/model_sparsity": 0.635829177670197, "compression_loss": 72.05140686035156, "distillation_loss": 2.0690853595733643, "epoch": 2.94, "learning_rate": 3.9245796938104634e-05, "loss": 74.5389, "step": 3473, "task_loss": 1.3855717182159424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6738497475171527, "compression/movement_sparsity/importance_threshold": -0.0023199387630641662, "compression/movement_sparsity/linear_layer_sparsity": 0.6588963901442653, "compression/movement_sparsity/model_sparsity": 0.6362612721404229, "compression_loss": 72.09387969970703, "distillation_loss": 3.0626018047332764, "epoch": 2.94, "learning_rate": 3.924110077956232e-05, "loss": 75.2538, "step": 3474, "task_loss": 1.5858852863311768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6742497682237569, "compression/movement_sparsity/importance_threshold": -0.002317093376509304, "compression/movement_sparsity/linear_layer_sparsity": 0.6594506612284831, "compression/movement_sparsity/model_sparsity": 0.6367965023077605, "compression_loss": 72.13627624511719, "distillation_loss": 2.4842472076416016, "epoch": 2.94, "learning_rate": 3.923640462102001e-05, "loss": 74.9107, "step": 3475, "task_loss": 1.5240068435668945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6746494617150778, "compression/movement_sparsity/importance_threshold": -0.0023142503174688753, "compression/movement_sparsity/linear_layer_sparsity": 0.6599232875368981, "compression/movement_sparsity/model_sparsity": 0.6372528924485129, "compression_loss": 72.17864227294922, "distillation_loss": 4.951432228088379, "epoch": 2.94, "learning_rate": 3.923170846247769e-05, "loss": 75.1705, "step": 3476, "task_loss": 2.379776954650879 }, { "compression/movement_sparsity/importance_regularization_factor": 0.675048828125, "compression/movement_sparsity/importance_threshold": -0.0023114095849905425, "compression/movement_sparsity/linear_layer_sparsity": 0.6605118048305662, "compression/movement_sparsity/model_sparsity": 0.6378211923626526, "compression_loss": 72.22097778320312, "distillation_loss": 2.282902479171753, "epoch": 2.94, "learning_rate": 3.9227012303935386e-05, "loss": 74.5277, "step": 3477, "task_loss": 1.3771378993988037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6754478675874089, "compression/movement_sparsity/importance_threshold": -0.0023085711781219674, "compression/movement_sparsity/linear_layer_sparsity": 0.6609378195676926, "compression/movement_sparsity/model_sparsity": 0.6382325721829841, "compression_loss": 72.26329040527344, "distillation_loss": 2.6364614963531494, "epoch": 2.94, "learning_rate": 3.9222316145393066e-05, "loss": 74.8927, "step": 3478, "task_loss": 1.7112637758255005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6758465802361895, "compression/movement_sparsity/importance_threshold": -0.0023057350959108137, "compression/movement_sparsity/linear_layer_sparsity": 0.661525311382944, "compression/movement_sparsity/model_sparsity": 0.6387998818470453, "compression_loss": 72.30551147460938, "distillation_loss": 3.58988618850708, "epoch": 2.94, "learning_rate": 3.921761998685076e-05, "loss": 75.2566, "step": 3479, "task_loss": 2.2622666358947754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6762449662052268, "compression/movement_sparsity/importance_threshold": -0.0023029013374047426, "compression/movement_sparsity/linear_layer_sparsity": 0.6620226207183653, "compression/movement_sparsity/model_sparsity": 0.6392801070768924, "compression_loss": 72.34774017333984, "distillation_loss": 3.058664083480835, "epoch": 2.94, "learning_rate": 3.9212923828308445e-05, "loss": 74.8477, "step": 3480, "task_loss": 2.1178576946258545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6766430256284065, "compression/movement_sparsity/importance_threshold": -0.0023000699016514125, "compression/movement_sparsity/linear_layer_sparsity": 0.6625188568786994, "compression/movement_sparsity/model_sparsity": 0.639759295998518, "compression_loss": 72.3899154663086, "distillation_loss": 3.588595390319824, "epoch": 2.94, "learning_rate": 3.920822766976613e-05, "loss": 75.1303, "step": 3481, "task_loss": 1.9834868907928467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.677040758639613, "compression/movement_sparsity/importance_threshold": -0.0022972407876984905, "compression/movement_sparsity/linear_layer_sparsity": 0.6628389730830517, "compression/movement_sparsity/model_sparsity": 0.6400684152264583, "compression_loss": 72.43203735351562, "distillation_loss": 2.891233444213867, "epoch": 2.94, "learning_rate": 3.9203531511223825e-05, "loss": 75.3998, "step": 3482, "task_loss": 2.0785725116729736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6774381653727322, "compression/movement_sparsity/importance_threshold": -0.0022944139945936335, "compression/movement_sparsity/linear_layer_sparsity": 0.6633386553278325, "compression/movement_sparsity/model_sparsity": 0.6405509318489285, "compression_loss": 72.47413635253906, "distillation_loss": 2.6242542266845703, "epoch": 2.94, "learning_rate": 3.9198835352681504e-05, "loss": 75.1873, "step": 3483, "task_loss": 1.1736419200897217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6778352459616486, "compression/movement_sparsity/importance_threshold": -0.0022915895213845077, "compression/movement_sparsity/linear_layer_sparsity": 0.6637439577857756, "compression/movement_sparsity/model_sparsity": 0.640942310920585, "compression_loss": 72.51622009277344, "distillation_loss": 3.848700761795044, "epoch": 2.94, "learning_rate": 3.91941391941392e-05, "loss": 75.084, "step": 3484, "task_loss": 2.2168338298797607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6782320005402478, "compression/movement_sparsity/importance_threshold": -0.002288767367118771, "compression/movement_sparsity/linear_layer_sparsity": 0.6642178361317924, "compression/movement_sparsity/model_sparsity": 0.6413999100875958, "compression_loss": 72.55821990966797, "distillation_loss": 2.8778038024902344, "epoch": 2.95, "learning_rate": 3.9189443035596884e-05, "loss": 75.2029, "step": 3485, "task_loss": 2.1395139694213867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6786284292424143, "compression/movement_sparsity/importance_threshold": -0.002285947530844091, "compression/movement_sparsity/linear_layer_sparsity": 0.6645823460122531, "compression/movement_sparsity/model_sparsity": 0.6417518979322991, "compression_loss": 72.60025787353516, "distillation_loss": 2.8405869007110596, "epoch": 2.95, "learning_rate": 3.918474687705457e-05, "loss": 75.3112, "step": 3486, "task_loss": 1.6518694162368774 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6790245322020341, "compression/movement_sparsity/importance_threshold": -0.002283130011608122, "compression/movement_sparsity/linear_layer_sparsity": 0.6650492964168735, "compression/movement_sparsity/model_sparsity": 0.6422028071540133, "compression_loss": 72.6422348022461, "distillation_loss": 2.517374038696289, "epoch": 2.95, "learning_rate": 3.9180050718512256e-05, "loss": 75.0126, "step": 3487, "task_loss": 1.882001280784607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6794203095529917, "compression/movement_sparsity/importance_threshold": -0.0022803148084585313, "compression/movement_sparsity/linear_layer_sparsity": 0.6655313189693856, "compression/movement_sparsity/model_sparsity": 0.6426682707489718, "compression_loss": 72.6842041015625, "distillation_loss": 2.669520616531372, "epoch": 2.95, "learning_rate": 3.917535455996994e-05, "loss": 75.2242, "step": 3488, "task_loss": 2.1473991870880127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6798157614291724, "compression/movement_sparsity/importance_threshold": -0.00227750192044298, "compression/movement_sparsity/linear_layer_sparsity": 0.666010384328324, "compression/movement_sparsity/model_sparsity": 0.6431308787390533, "compression_loss": 72.72604370117188, "distillation_loss": 4.262603759765625, "epoch": 2.95, "learning_rate": 3.9170658401427636e-05, "loss": 76.0201, "step": 3489, "task_loss": 2.5081136226654053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6802108879644617, "compression/movement_sparsity/importance_threshold": -0.0022746913466091274, "compression/movement_sparsity/linear_layer_sparsity": 0.6665092318813703, "compression/movement_sparsity/model_sparsity": 0.6436125893440179, "compression_loss": 72.76790618896484, "distillation_loss": 3.7236382961273193, "epoch": 2.95, "learning_rate": 3.916596224288532e-05, "loss": 76.0841, "step": 3490, "task_loss": 1.9795567989349365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6806056892927441, "compression/movement_sparsity/importance_threshold": -0.002271883086004639, "compression/movement_sparsity/linear_layer_sparsity": 0.6670161520959061, "compression/movement_sparsity/model_sparsity": 0.6441020952897154, "compression_loss": 72.80977630615234, "distillation_loss": 2.6771953105926514, "epoch": 2.95, "learning_rate": 3.916126608434301e-05, "loss": 75.519, "step": 3491, "task_loss": 1.7552495002746582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6810001655479054, "compression/movement_sparsity/importance_threshold": -0.0022690771376771713, "compression/movement_sparsity/linear_layer_sparsity": 0.6674763891941475, "compression/movement_sparsity/model_sparsity": 0.6445465218277774, "compression_loss": 72.85160827636719, "distillation_loss": 4.405814170837402, "epoch": 2.95, "learning_rate": 3.9156569925800695e-05, "loss": 76.3604, "step": 3492, "task_loss": 1.3829325437545776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6813943168638301, "compression/movement_sparsity/importance_threshold": -0.0022662735006743935, "compression/movement_sparsity/linear_layer_sparsity": 0.6678874391008911, "compression/movement_sparsity/model_sparsity": 0.6449434509056867, "compression_loss": 72.89340209960938, "distillation_loss": 3.0415916442871094, "epoch": 2.95, "learning_rate": 3.915187376725839e-05, "loss": 75.6401, "step": 3493, "task_loss": 1.583380103111267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.681788143374404, "compression/movement_sparsity/importance_threshold": -0.0022634721740439605, "compression/movement_sparsity/linear_layer_sparsity": 0.6684545167411498, "compression/movement_sparsity/model_sparsity": 0.6454910476844677, "compression_loss": 72.93511962890625, "distillation_loss": 3.0690670013427734, "epoch": 2.95, "learning_rate": 3.9147177608716074e-05, "loss": 75.7245, "step": 3494, "task_loss": 2.789395332336426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6821816452135114, "compression/movement_sparsity/importance_threshold": -0.0022606731568335404, "compression/movement_sparsity/linear_layer_sparsity": 0.6689241620076559, "compression/movement_sparsity/model_sparsity": 0.6459445591912715, "compression_loss": 72.97682189941406, "distillation_loss": 2.438047409057617, "epoch": 2.95, "learning_rate": 3.914248145017376e-05, "loss": 75.5048, "step": 3495, "task_loss": 1.2682114839553833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6825748225150383, "compression/movement_sparsity/importance_threshold": -0.002257876448090789, "compression/movement_sparsity/linear_layer_sparsity": 0.6694099049004704, "compression/movement_sparsity/model_sparsity": 0.646413615321398, "compression_loss": 73.01849365234375, "distillation_loss": 4.530074119567871, "epoch": 2.95, "learning_rate": 3.913778529163145e-05, "loss": 75.8596, "step": 3496, "task_loss": 2.2829620838165283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6829676754128695, "compression/movement_sparsity/importance_threshold": -0.002255082046863371, "compression/movement_sparsity/linear_layer_sparsity": 0.6699488415051085, "compression/movement_sparsity/model_sparsity": 0.6469340377957038, "compression_loss": 73.06009674072266, "distillation_loss": 2.77852725982666, "epoch": 2.96, "learning_rate": 3.913308913308913e-05, "loss": 75.6513, "step": 3497, "task_loss": 1.9271581172943115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.68336020404089, "compression/movement_sparsity/importance_threshold": -0.0022522899521989492, "compression/movement_sparsity/linear_layer_sparsity": 0.6703567672799314, "compression/movement_sparsity/model_sparsity": 0.647327950065235, "compression_loss": 73.10172271728516, "distillation_loss": 1.8541961908340454, "epoch": 2.96, "learning_rate": 3.9128392974546826e-05, "loss": 75.8291, "step": 3498, "task_loss": 2.081282615661621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6837524085329851, "compression/movement_sparsity/importance_threshold": -0.0022495001631451846, "compression/movement_sparsity/linear_layer_sparsity": 0.6707800871551721, "compression/movement_sparsity/model_sparsity": 0.6477367276004771, "compression_loss": 73.14324188232422, "distillation_loss": 2.247497081756592, "epoch": 2.96, "learning_rate": 3.912369681600451e-05, "loss": 75.4658, "step": 3499, "task_loss": 2.162454605102539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6841442890230398, "compression/movement_sparsity/importance_threshold": -0.0022467126787497392, "compression/movement_sparsity/linear_layer_sparsity": 0.6712956643154115, "compression/movement_sparsity/model_sparsity": 0.6482345930991614, "compression_loss": 73.18476867675781, "distillation_loss": 2.79879093170166, "epoch": 2.96, "learning_rate": 3.91190006574622e-05, "loss": 75.1831, "step": 3500, "task_loss": 1.47402024269104 }, { "epoch": 2.96, "eval_accuracy": 0.7183366336633663, "eval_loss": 75.31998443603516, "eval_runtime": 226.861, "eval_samples_per_second": 111.302, "eval_steps_per_second": 0.873, "step": 3500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6845358456449396, "compression/movement_sparsity/importance_threshold": -0.0022439274980602733, "compression/movement_sparsity/linear_layer_sparsity": 0.6718541327066372, "compression/movement_sparsity/model_sparsity": 0.6487738763830987, "compression_loss": 73.22624969482422, "distillation_loss": 2.535590648651123, "epoch": 2.96, "learning_rate": 3.9114304498919885e-05, "loss": 75.8093, "step": 3501, "task_loss": 1.5770155191421509 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6849270785325692, "compression/movement_sparsity/importance_threshold": -0.0022411446201244514, "compression/movement_sparsity/linear_layer_sparsity": 0.6722115954040251, "compression/movement_sparsity/model_sparsity": 0.6491190591371474, "compression_loss": 73.26763153076172, "distillation_loss": 3.4821572303771973, "epoch": 2.96, "learning_rate": 3.910960834037757e-05, "loss": 76.123, "step": 3502, "task_loss": 2.48134708404541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6853179878198141, "compression/movement_sparsity/importance_threshold": -0.002238364043989931, "compression/movement_sparsity/linear_layer_sparsity": 0.6727005578221014, "compression/movement_sparsity/model_sparsity": 0.6495912241919384, "compression_loss": 73.30900573730469, "distillation_loss": 2.9437808990478516, "epoch": 2.96, "learning_rate": 3.9104912181835265e-05, "loss": 75.4127, "step": 3503, "task_loss": 0.9635573625564575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6857085736405589, "compression/movement_sparsity/importance_threshold": -0.0022355857687043814, "compression/movement_sparsity/linear_layer_sparsity": 0.6731941468172202, "compression/movement_sparsity/model_sparsity": 0.6500678568866176, "compression_loss": 73.35037231445312, "distillation_loss": 2.957620620727539, "epoch": 2.96, "learning_rate": 3.9100216023292944e-05, "loss": 76.1783, "step": 3504, "task_loss": 2.1698246002197266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6860988361286895, "compression/movement_sparsity/importance_threshold": -0.0022328097933154564, "compression/movement_sparsity/linear_layer_sparsity": 0.673578009621754, "compression/movement_sparsity/model_sparsity": 0.6504385328229154, "compression_loss": 73.39167022705078, "distillation_loss": 3.4697365760803223, "epoch": 2.96, "learning_rate": 3.909551986475064e-05, "loss": 76.2657, "step": 3505, "task_loss": 2.080948829650879 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6864887754180904, "compression/movement_sparsity/importance_threshold": -0.0022300361168708233, "compression/movement_sparsity/linear_layer_sparsity": 0.674048573049168, "compression/movement_sparsity/model_sparsity": 0.6508929309489754, "compression_loss": 73.432861328125, "distillation_loss": 3.099545955657959, "epoch": 2.96, "learning_rate": 3.9090823706208324e-05, "loss": 76.7096, "step": 3506, "task_loss": 2.3421928882598877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6868783916426471, "compression/movement_sparsity/importance_threshold": -0.0022272647384181414, "compression/movement_sparsity/linear_layer_sparsity": 0.6745603821724345, "compression/movement_sparsity/model_sparsity": 0.6513871578543488, "compression_loss": 73.47409057617188, "distillation_loss": 2.69588041305542, "epoch": 2.96, "learning_rate": 3.908612754766601e-05, "loss": 76.051, "step": 3507, "task_loss": 2.2204692363739014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6872676849362445, "compression/movement_sparsity/importance_threshold": -0.0022244956570050736, "compression/movement_sparsity/linear_layer_sparsity": 0.6751378338385364, "compression/movement_sparsity/model_sparsity": 0.651944772279271, "compression_loss": 73.5152587890625, "distillation_loss": 3.772573232650757, "epoch": 2.96, "learning_rate": 3.90814313891237e-05, "loss": 76.4884, "step": 3508, "task_loss": 2.2748515605926514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6876566554327679, "compression/movement_sparsity/importance_threshold": -0.0022217288716792818, "compression/movement_sparsity/linear_layer_sparsity": 0.6755651721582705, "compression/movement_sparsity/model_sparsity": 0.6523574302130757, "compression_loss": 73.55633544921875, "distillation_loss": 3.123565196990967, "epoch": 2.97, "learning_rate": 3.907673523058138e-05, "loss": 76.3603, "step": 3509, "task_loss": 2.7183382511138916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6880453032661026, "compression/movement_sparsity/importance_threshold": -0.0022189643814884254, "compression/movement_sparsity/linear_layer_sparsity": 0.676042663527081, "compression/movement_sparsity/model_sparsity": 0.6528185182844323, "compression_loss": 73.59748840332031, "distillation_loss": 4.2341814041137695, "epoch": 2.97, "learning_rate": 3.9072039072039076e-05, "loss": 76.4407, "step": 3510, "task_loss": 3.533322334289551 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6884336285701332, "compression/movement_sparsity/importance_threshold": -0.0022162021854801707, "compression/movement_sparsity/linear_layer_sparsity": 0.6764635031754536, "compression/movement_sparsity/model_sparsity": 0.653224900796229, "compression_loss": 73.6385498046875, "distillation_loss": 2.0789954662323, "epoch": 2.97, "learning_rate": 3.906734291349676e-05, "loss": 76.1331, "step": 3511, "task_loss": 2.5840728282928467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6888216314787456, "compression/movement_sparsity/importance_threshold": -0.0022134422827021745, "compression/movement_sparsity/linear_layer_sparsity": 0.6769148090721356, "compression/movement_sparsity/model_sparsity": 0.6536607029469808, "compression_loss": 73.67948150634766, "distillation_loss": 1.730795979499817, "epoch": 2.97, "learning_rate": 3.906264675495445e-05, "loss": 76.0926, "step": 3512, "task_loss": 0.979154109954834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6892093121258241, "compression/movement_sparsity/importance_threshold": -0.002210684672202104, "compression/movement_sparsity/linear_layer_sparsity": 0.6774520762933047, "compression/movement_sparsity/model_sparsity": 0.6541795133862754, "compression_loss": 73.7204360961914, "distillation_loss": 4.301840782165527, "epoch": 2.97, "learning_rate": 3.9057950596412135e-05, "loss": 76.896, "step": 3513, "task_loss": 2.772507429122925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6895966706452548, "compression/movement_sparsity/importance_threshold": -0.002207929353027616, "compression/movement_sparsity/linear_layer_sparsity": 0.6780163398300014, "compression/movement_sparsity/model_sparsity": 0.6547243927346089, "compression_loss": 73.76136779785156, "distillation_loss": 3.27933931350708, "epoch": 2.97, "learning_rate": 3.905325443786982e-05, "loss": 76.4585, "step": 3514, "task_loss": 1.945916771888733 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6899837071709218, "compression/movement_sparsity/importance_threshold": -0.0022051763242263763, "compression/movement_sparsity/linear_layer_sparsity": 0.6786542589501848, "compression/movement_sparsity/model_sparsity": 0.6553403973705452, "compression_loss": 73.80224609375, "distillation_loss": 1.6093475818634033, "epoch": 2.97, "learning_rate": 3.9048558279327515e-05, "loss": 76.5109, "step": 3515, "task_loss": 1.508538007736206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.690370421836711, "compression/movement_sparsity/importance_threshold": -0.002202425584846045, "compression/movement_sparsity/linear_layer_sparsity": 0.6791459400784821, "compression/movement_sparsity/model_sparsity": 0.6558151877394973, "compression_loss": 73.84312438964844, "distillation_loss": 4.23435115814209, "epoch": 2.97, "learning_rate": 3.90438621207852e-05, "loss": 76.4749, "step": 3516, "task_loss": 1.989656686782837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6907568147765073, "compression/movement_sparsity/importance_threshold": -0.0021996771339342835, "compression/movement_sparsity/linear_layer_sparsity": 0.6796678131992332, "compression/movement_sparsity/model_sparsity": 0.6563191329130812, "compression_loss": 73.88398742675781, "distillation_loss": 3.049501657485962, "epoch": 2.97, "learning_rate": 3.903916596224289e-05, "loss": 76.8429, "step": 3517, "task_loss": 1.5933555364608765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6911428861241957, "compression/movement_sparsity/importance_threshold": -0.002196930970538755, "compression/movement_sparsity/linear_layer_sparsity": 0.6801530433528393, "compression/movement_sparsity/model_sparsity": 0.6567876939181684, "compression_loss": 73.92486572265625, "distillation_loss": 2.55890154838562, "epoch": 2.97, "learning_rate": 3.9034469803700574e-05, "loss": 76.4968, "step": 3518, "task_loss": 1.2757389545440674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6915286360136617, "compression/movement_sparsity/importance_threshold": -0.0021941870937071195, "compression/movement_sparsity/linear_layer_sparsity": 0.6805871188272877, "compression/movement_sparsity/model_sparsity": 0.6572068575646971, "compression_loss": 73.96563720703125, "distillation_loss": 2.6474199295043945, "epoch": 2.97, "learning_rate": 3.902977364515826e-05, "loss": 77.1935, "step": 3519, "task_loss": 1.2993922233581543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6919140645787898, "compression/movement_sparsity/importance_threshold": -0.0021914455024870418, "compression/movement_sparsity/linear_layer_sparsity": 0.6811151209702037, "compression/movement_sparsity/model_sparsity": 0.6577167212096794, "compression_loss": 74.00634765625, "distillation_loss": 2.6265106201171875, "epoch": 2.97, "learning_rate": 3.902507748661595e-05, "loss": 76.9899, "step": 3520, "task_loss": 0.9183785319328308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.692299171953466, "compression/movement_sparsity/importance_threshold": -0.0021887061959261793, "compression/movement_sparsity/linear_layer_sparsity": 0.681703304387178, "compression/movement_sparsity/model_sparsity": 0.6582846987168167, "compression_loss": 74.0470962524414, "distillation_loss": 2.0118727684020996, "epoch": 2.98, "learning_rate": 3.902038132807363e-05, "loss": 76.413, "step": 3521, "task_loss": 1.5270252227783203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6926839582715747, "compression/movement_sparsity/importance_threshold": -0.002185969173072199, "compression/movement_sparsity/linear_layer_sparsity": 0.6822042625178958, "compression/movement_sparsity/model_sparsity": 0.6587684473946169, "compression_loss": 74.08776092529297, "distillation_loss": 2.8667378425598145, "epoch": 2.98, "learning_rate": 3.9015685169531326e-05, "loss": 76.4947, "step": 3522, "task_loss": 0.9803719520568848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6930684236670016, "compression/movement_sparsity/importance_threshold": -0.0021832344329727575, "compression/movement_sparsity/linear_layer_sparsity": 0.682721616379113, "compression/movement_sparsity/model_sparsity": 0.6592680285591347, "compression_loss": 74.12836456298828, "distillation_loss": 2.791947364807129, "epoch": 2.98, "learning_rate": 3.901098901098901e-05, "loss": 77.1623, "step": 3523, "task_loss": 1.8601365089416504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6934525682736313, "compression/movement_sparsity/importance_threshold": -0.002180501974675522, "compression/movement_sparsity/linear_layer_sparsity": 0.6831398327106056, "compression/movement_sparsity/model_sparsity": 0.6596718778730566, "compression_loss": 74.1689453125, "distillation_loss": 3.6679208278656006, "epoch": 2.98, "learning_rate": 3.9006292852446705e-05, "loss": 77.5362, "step": 3524, "task_loss": 1.6940655708312988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6938363922253495, "compression/movement_sparsity/importance_threshold": -0.002177771797228149, "compression/movement_sparsity/linear_layer_sparsity": 0.6835937619241677, "compression/movement_sparsity/model_sparsity": 0.6601102132216832, "compression_loss": 74.20946502685547, "distillation_loss": 3.0077221393585205, "epoch": 2.98, "learning_rate": 3.9001596693904385e-05, "loss": 77.3926, "step": 3525, "task_loss": 1.6996278762817383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6942198956560408, "compression/movement_sparsity/importance_threshold": -0.0021750438996783064, "compression/movement_sparsity/linear_layer_sparsity": 0.6840320108572885, "compression/movement_sparsity/model_sparsity": 0.6605334069557399, "compression_loss": 74.25007629394531, "distillation_loss": 3.4235124588012695, "epoch": 2.98, "learning_rate": 3.899690053536207e-05, "loss": 77.4592, "step": 3526, "task_loss": 1.9623466730117798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6946030786995907, "compression/movement_sparsity/importance_threshold": -0.002172318281073651, "compression/movement_sparsity/linear_layer_sparsity": 0.6846884720581456, "compression/movement_sparsity/model_sparsity": 0.6611673166948367, "compression_loss": 74.2906494140625, "distillation_loss": 4.223805904388428, "epoch": 2.98, "learning_rate": 3.8992204376819764e-05, "loss": 77.7134, "step": 3527, "task_loss": 2.245352268218994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6949859414898842, "compression/movement_sparsity/importance_threshold": -0.002169594940461846, "compression/movement_sparsity/linear_layer_sparsity": 0.6851248250486125, "compression/movement_sparsity/model_sparsity": 0.6615886796177022, "compression_loss": 74.33116912841797, "distillation_loss": 2.150214195251465, "epoch": 2.98, "learning_rate": 3.898750821827745e-05, "loss": 77.3257, "step": 3528, "task_loss": 1.483879804611206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6953684841608067, "compression/movement_sparsity/importance_threshold": -0.002166873876890553, "compression/movement_sparsity/linear_layer_sparsity": 0.685578277295469, "compression/movement_sparsity/model_sparsity": 0.662026554384897, "compression_loss": 74.37163543701172, "distillation_loss": 2.762295722961426, "epoch": 2.98, "learning_rate": 3.8982812059735144e-05, "loss": 77.4439, "step": 3529, "task_loss": 1.0154002904891968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.695750706846243, "compression/movement_sparsity/importance_threshold": -0.0021641550894074355, "compression/movement_sparsity/linear_layer_sparsity": 0.6860538011766196, "compression/movement_sparsity/model_sparsity": 0.6624857425578475, "compression_loss": 74.41211700439453, "distillation_loss": 3.717510223388672, "epoch": 2.98, "learning_rate": 3.897811590119282e-05, "loss": 77.8371, "step": 3530, "task_loss": 2.6904444694519043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6961326096800781, "compression/movement_sparsity/importance_threshold": -0.0021614385770601556, "compression/movement_sparsity/linear_layer_sparsity": 0.6865461739066396, "compression/movement_sparsity/model_sparsity": 0.6629612007698756, "compression_loss": 74.45257568359375, "distillation_loss": 2.8650522232055664, "epoch": 2.98, "learning_rate": 3.8973419742650516e-05, "loss": 77.5208, "step": 3531, "task_loss": 2.1447932720184326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6965141927961978, "compression/movement_sparsity/importance_threshold": -0.002158724338896371, "compression/movement_sparsity/linear_layer_sparsity": 0.6870128142829015, "compression/movement_sparsity/model_sparsity": 0.6634118106136593, "compression_loss": 74.49295043945312, "distillation_loss": 2.873107433319092, "epoch": 2.99, "learning_rate": 3.89687235841082e-05, "loss": 77.4228, "step": 3532, "task_loss": 1.2392003536224365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6968954563284865, "compression/movement_sparsity/importance_threshold": -0.0021560123739637485, "compression/movement_sparsity/linear_layer_sparsity": 0.6875649390169449, "compression/movement_sparsity/model_sparsity": 0.6639449681645538, "compression_loss": 74.53333282470703, "distillation_loss": 2.6479430198669434, "epoch": 2.99, "learning_rate": 3.896402742556589e-05, "loss": 77.7501, "step": 3533, "task_loss": 1.6077451705932617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6972764004108299, "compression/movement_sparsity/importance_threshold": -0.002153302681309946, "compression/movement_sparsity/linear_layer_sparsity": 0.6879920746258291, "compression/movement_sparsity/model_sparsity": 0.6643574303512501, "compression_loss": 74.57366180419922, "distillation_loss": 4.929904460906982, "epoch": 2.99, "learning_rate": 3.8959331267023575e-05, "loss": 79.1687, "step": 3534, "task_loss": 3.060385227203369 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6976570251771128, "compression/movement_sparsity/importance_threshold": -0.002150595259982629, "compression/movement_sparsity/linear_layer_sparsity": 0.6885543468265306, "compression/movement_sparsity/model_sparsity": 0.6649003867721058, "compression_loss": 74.61400604248047, "distillation_loss": 3.180738687515259, "epoch": 2.99, "learning_rate": 3.895463510848126e-05, "loss": 77.8339, "step": 3535, "task_loss": 1.213765025138855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6980373307612205, "compression/movement_sparsity/importance_threshold": -0.0021478901090294577, "compression/movement_sparsity/linear_layer_sparsity": 0.6891270765222488, "compression/movement_sparsity/model_sparsity": 0.6654534414408535, "compression_loss": 74.6542739868164, "distillation_loss": 3.0136256217956543, "epoch": 2.99, "learning_rate": 3.8949938949938955e-05, "loss": 77.6213, "step": 3536, "task_loss": 1.70624840259552 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6984173172970382, "compression/movement_sparsity/importance_threshold": -0.002145187227498093, "compression/movement_sparsity/linear_layer_sparsity": 0.6894479439491622, "compression/movement_sparsity/model_sparsity": 0.6657632860845488, "compression_loss": 74.69451904296875, "distillation_loss": 2.4136743545532227, "epoch": 2.99, "learning_rate": 3.894524279139664e-05, "loss": 77.2557, "step": 3537, "task_loss": 1.9171274900436401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6987969849184509, "compression/movement_sparsity/importance_threshold": -0.002142486614436197, "compression/movement_sparsity/linear_layer_sparsity": 0.689906237408079, "compression/movement_sparsity/model_sparsity": 0.6662058357532762, "compression_loss": 74.73471069335938, "distillation_loss": 2.347219228744507, "epoch": 2.99, "learning_rate": 3.894054663285433e-05, "loss": 77.4227, "step": 3538, "task_loss": 1.6499663591384888 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6991763337593437, "compression/movement_sparsity/importance_threshold": -0.0021397882688914327, "compression/movement_sparsity/linear_layer_sparsity": 0.6903359963338431, "compression/movement_sparsity/model_sparsity": 0.6666208311378472, "compression_loss": 74.77484130859375, "distillation_loss": 3.520357370376587, "epoch": 2.99, "learning_rate": 3.8935850474312014e-05, "loss": 77.6399, "step": 3539, "task_loss": 2.5029141902923584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6995553639536018, "compression/movement_sparsity/importance_threshold": -0.0021370921899114626, "compression/movement_sparsity/linear_layer_sparsity": 0.690773982935276, "compression/movement_sparsity/model_sparsity": 0.6670437715521165, "compression_loss": 74.81503295898438, "distillation_loss": 4.499200344085693, "epoch": 2.99, "learning_rate": 3.89311543157697e-05, "loss": 78.5959, "step": 3540, "task_loss": 2.819239616394043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6999340756351107, "compression/movement_sparsity/importance_threshold": -0.0021343983765439445, "compression/movement_sparsity/linear_layer_sparsity": 0.6912296053806424, "compression/movement_sparsity/model_sparsity": 0.6674837419648258, "compression_loss": 74.85511779785156, "distillation_loss": 3.7583212852478027, "epoch": 2.99, "learning_rate": 3.892645815722739e-05, "loss": 77.6519, "step": 3541, "task_loss": 2.638617515563965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7003124689377547, "compression/movement_sparsity/importance_threshold": -0.0021317068278365473, "compression/movement_sparsity/linear_layer_sparsity": 0.6915646148703719, "compression/movement_sparsity/model_sparsity": 0.6678072428479735, "compression_loss": 74.89524841308594, "distillation_loss": 2.9454989433288574, "epoch": 2.99, "learning_rate": 3.892176199868508e-05, "loss": 77.749, "step": 3542, "task_loss": 2.765164613723755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7006905439954199, "compression/movement_sparsity/importance_threshold": -0.0021290175428369243, "compression/movement_sparsity/linear_layer_sparsity": 0.6919612842309465, "compression/movement_sparsity/model_sparsity": 0.6681902853957147, "compression_loss": 74.93534851074219, "distillation_loss": 4.279690742492676, "epoch": 2.99, "learning_rate": 3.8917065840142766e-05, "loss": 77.5911, "step": 3543, "task_loss": 2.734196186065674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7010683009419907, "compression/movement_sparsity/importance_threshold": -0.0021263305205927443, "compression/movement_sparsity/linear_layer_sparsity": 0.6923582755440473, "compression/movement_sparsity/model_sparsity": 0.6685736388359224, "compression_loss": 74.97530364990234, "distillation_loss": 2.567457437515259, "epoch": 3.0, "learning_rate": 3.891236968160045e-05, "loss": 77.7554, "step": 3544, "task_loss": 1.4063729047775269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7014457399113527, "compression/movement_sparsity/importance_threshold": -0.002123645760151665, "compression/movement_sparsity/linear_layer_sparsity": 0.6928565030403766, "compression/movement_sparsity/model_sparsity": 0.6690547506850256, "compression_loss": 75.01527404785156, "distillation_loss": 3.1183176040649414, "epoch": 3.0, "learning_rate": 3.890767352305814e-05, "loss": 77.5146, "step": 3545, "task_loss": 1.3204967975616455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7018228610373907, "compression/movement_sparsity/importance_threshold": -0.002120963260561351, "compression/movement_sparsity/linear_layer_sparsity": 0.6933636378899298, "compression/movement_sparsity/model_sparsity": 0.6695444638923674, "compression_loss": 75.0551986694336, "distillation_loss": 2.9260261058807373, "epoch": 3.0, "learning_rate": 3.890297736451583e-05, "loss": 78.1555, "step": 3546, "task_loss": 1.6122633218765259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7021996644539901, "compression/movement_sparsity/importance_threshold": -0.0021182830208694627, "compression/movement_sparsity/linear_layer_sparsity": 0.6938276191768089, "compression/movement_sparsity/model_sparsity": 0.6699925059946689, "compression_loss": 75.09510803222656, "distillation_loss": 3.9882335662841797, "epoch": 3.0, "learning_rate": 3.889828120597351e-05, "loss": 78.4406, "step": 3547, "task_loss": 2.376908302307129 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7025761502950361, "compression/movement_sparsity/importance_threshold": -0.0021156050401236618, "compression/movement_sparsity/linear_layer_sparsity": 0.6942262560250434, "compression/movement_sparsity/model_sparsity": 0.6703774484408161, "compression_loss": 75.13496398925781, "distillation_loss": 2.8581056594848633, "epoch": 3.0, "learning_rate": 3.8893585047431204e-05, "loss": 78.001, "step": 3548, "task_loss": 1.8843731880187988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7029523186944135, "compression/movement_sparsity/importance_threshold": -0.0021129293173716103, "compression/movement_sparsity/linear_layer_sparsity": 0.694675355950713, "compression/movement_sparsity/model_sparsity": 0.670811120402446, "compression_loss": 75.17481231689453, "distillation_loss": 3.310119152069092, "epoch": 3.0, "learning_rate": 3.888888888888889e-05, "loss": 78.7206, "step": 3549, "task_loss": 2.3876023292541504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7037037037037036, "compression/movement_sparsity/importance_threshold": -0.002107584642039406, "compression/movement_sparsity/linear_layer_sparsity": 0.6954562219925151, "compression/movement_sparsity/model_sparsity": 0.6715651612934872, "compression_loss": 75.25475311279297, "distillation_loss": 3.9682743549346924, "epoch": 3.0, "learning_rate": 3.888419273034658e-05, "loss": 137.4576, "step": 3550, "task_loss": 2.486384391784668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7040789205813869, "compression/movement_sparsity/importance_threshold": -0.002104915687554573, "compression/movement_sparsity/linear_layer_sparsity": 0.6957845777967038, "compression/movement_sparsity/model_sparsity": 0.6718822370656615, "compression_loss": 75.29444122314453, "distillation_loss": 2.174891710281372, "epoch": 3.0, "learning_rate": 3.8879496571804263e-05, "loss": 77.9102, "step": 3551, "task_loss": 1.5777196884155273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7044538205529419, "compression/movement_sparsity/importance_threshold": -0.0021022489872541406, "compression/movement_sparsity/linear_layer_sparsity": 0.6962383996927571, "compression/movement_sparsity/model_sparsity": 0.672320468783466, "compression_loss": 75.3340835571289, "distillation_loss": 3.236048698425293, "epoch": 3.0, "learning_rate": 3.887480041326195e-05, "loss": 78.6285, "step": 3552, "task_loss": 1.3573191165924072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7048284037522546, "compression/movement_sparsity/importance_threshold": -0.002099584540185764, "compression/movement_sparsity/linear_layer_sparsity": 0.696685949476634, "compression/movement_sparsity/model_sparsity": 0.6727526438554424, "compression_loss": 75.37374877929688, "distillation_loss": 2.0266623497009277, "epoch": 3.0, "learning_rate": 3.887010425471964e-05, "loss": 78.29, "step": 3553, "task_loss": 1.3495464324951172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7052026703132095, "compression/movement_sparsity/importance_threshold": -0.0020969223453971104, "compression/movement_sparsity/linear_layer_sparsity": 0.6972166584056033, "compression/movement_sparsity/model_sparsity": 0.67326512130005, "compression_loss": 75.41336059570312, "distillation_loss": 2.9104413986206055, "epoch": 3.0, "learning_rate": 3.886540809617733e-05, "loss": 78.2456, "step": 3554, "task_loss": 2.2489724159240723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7055766203696922, "compression/movement_sparsity/importance_threshold": -0.0020942624019358383, "compression/movement_sparsity/linear_layer_sparsity": 0.6975313371895138, "compression/movement_sparsity/model_sparsity": 0.673568989899668, "compression_loss": 75.45289611816406, "distillation_loss": 1.7926362752914429, "epoch": 3.01, "learning_rate": 3.8860711937635016e-05, "loss": 78.2416, "step": 3555, "task_loss": 1.0591405630111694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7059502540555873, "compression/movement_sparsity/importance_threshold": -0.0020916047088496113, "compression/movement_sparsity/linear_layer_sparsity": 0.6978969560175646, "compression/movement_sparsity/model_sparsity": 0.6739220485962002, "compression_loss": 75.49240112304688, "distillation_loss": 2.7379374504089355, "epoch": 3.01, "learning_rate": 3.88560157790927e-05, "loss": 77.9984, "step": 3556, "task_loss": 2.1312997341156006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7063235715047802, "compression/movement_sparsity/importance_threshold": -0.0020889492651860924, "compression/movement_sparsity/linear_layer_sparsity": 0.6984305982917723, "compression/movement_sparsity/model_sparsity": 0.6744373586166132, "compression_loss": 75.53192138671875, "distillation_loss": 2.39157772064209, "epoch": 3.01, "learning_rate": 3.885131962055039e-05, "loss": 78.4857, "step": 3557, "task_loss": 1.3155810832977295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7066965728511565, "compression/movement_sparsity/importance_threshold": -0.002086296069992938, "compression/movement_sparsity/linear_layer_sparsity": 0.698767408330815, "compression/movement_sparsity/model_sparsity": 0.6747625981946659, "compression_loss": 75.57136535644531, "distillation_loss": 4.978303909301758, "epoch": 3.01, "learning_rate": 3.884662346200808e-05, "loss": 79.7478, "step": 3558, "task_loss": 3.466148614883423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7070692582286007, "compression/movement_sparsity/importance_threshold": -0.002083645122317816, "compression/movement_sparsity/linear_layer_sparsity": 0.6992551187112894, "compression/movement_sparsity/model_sparsity": 0.6752335542231984, "compression_loss": 75.61084747314453, "distillation_loss": 2.63938570022583, "epoch": 3.01, "learning_rate": 3.884192730346577e-05, "loss": 78.5779, "step": 3559, "task_loss": 1.249261736869812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7074416277709983, "compression/movement_sparsity/importance_threshold": -0.002080996421208384, "compression/movement_sparsity/linear_layer_sparsity": 0.6996514184226673, "compression/movement_sparsity/model_sparsity": 0.6756162398203299, "compression_loss": 75.65015411376953, "distillation_loss": 3.9911413192749023, "epoch": 3.01, "learning_rate": 3.8837231144923454e-05, "loss": 78.3296, "step": 3560, "task_loss": 2.7430620193481445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7078136816122341, "compression/movement_sparsity/importance_threshold": -0.002078349965712308, "compression/movement_sparsity/linear_layer_sparsity": 0.7001234604468682, "compression/movement_sparsity/model_sparsity": 0.6760720657488284, "compression_loss": 75.68952178955078, "distillation_loss": 4.051279067993164, "epoch": 3.01, "learning_rate": 3.883253498638114e-05, "loss": 78.328, "step": 3561, "task_loss": 2.8960814476013184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7081854198861938, "compression/movement_sparsity/importance_threshold": -0.0020757057548772447, "compression/movement_sparsity/linear_layer_sparsity": 0.7003806170461032, "compression/movement_sparsity/model_sparsity": 0.6763203882277735, "compression_loss": 75.72885131835938, "distillation_loss": 2.0273356437683105, "epoch": 3.01, "learning_rate": 3.8827838827838833e-05, "loss": 78.6273, "step": 3562, "task_loss": 1.7645612955093384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7085568427267619, "compression/movement_sparsity/importance_threshold": -0.002073063787750861, "compression/movement_sparsity/linear_layer_sparsity": 0.7008293830950789, "compression/movement_sparsity/model_sparsity": 0.676753737782401, "compression_loss": 75.7680892944336, "distillation_loss": 2.4545154571533203, "epoch": 3.01, "learning_rate": 3.882314266929652e-05, "loss": 78.7097, "step": 3563, "task_loss": 1.9368609189987183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7089279502678241, "compression/movement_sparsity/importance_threshold": -0.0020704240633808157, "compression/movement_sparsity/linear_layer_sparsity": 0.7011164097342418, "compression/movement_sparsity/model_sparsity": 0.6770309041735114, "compression_loss": 75.80732727050781, "distillation_loss": 2.1269760131835938, "epoch": 3.01, "learning_rate": 3.8818446510754206e-05, "loss": 78.4735, "step": 3564, "task_loss": 1.340146780014038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.709298742643265, "compression/movement_sparsity/importance_threshold": -0.0020677865808147724, "compression/movement_sparsity/linear_layer_sparsity": 0.701614017173854, "compression/movement_sparsity/model_sparsity": 0.6775114172667535, "compression_loss": 75.84651947021484, "distillation_loss": 1.8742012977600098, "epoch": 3.01, "learning_rate": 3.881375035221189e-05, "loss": 78.0446, "step": 3565, "task_loss": 0.8562831878662109 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7096692199869703, "compression/movement_sparsity/importance_threshold": -0.0020651513391003904, "compression/movement_sparsity/linear_layer_sparsity": 0.7020995096591481, "compression/movement_sparsity/model_sparsity": 0.6779802315916281, "compression_loss": 75.88569641113281, "distillation_loss": 3.4042015075683594, "epoch": 3.01, "learning_rate": 3.880905419366958e-05, "loss": 78.981, "step": 3566, "task_loss": 2.506352424621582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7100393824328246, "compression/movement_sparsity/importance_threshold": -0.0020625183372853352, "compression/movement_sparsity/linear_layer_sparsity": 0.7025151861429343, "compression/movement_sparsity/model_sparsity": 0.6783816283094258, "compression_loss": 75.9248046875, "distillation_loss": 3.2751893997192383, "epoch": 3.02, "learning_rate": 3.880435803512727e-05, "loss": 78.7918, "step": 3567, "task_loss": 1.9876375198364258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7104092301147134, "compression/movement_sparsity/importance_threshold": -0.002059887574417267, "compression/movement_sparsity/linear_layer_sparsity": 0.7029455055045774, "compression/movement_sparsity/model_sparsity": 0.6787971648771792, "compression_loss": 75.96391296386719, "distillation_loss": 2.3648595809936523, "epoch": 3.02, "learning_rate": 3.879966187658495e-05, "loss": 79.2671, "step": 3568, "task_loss": 3.3742454051971436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7107787631665219, "compression/movement_sparsity/importance_threshold": -0.0020572590495438444, "compression/movement_sparsity/linear_layer_sparsity": 0.7034879597386681, "compression/movement_sparsity/model_sparsity": 0.6793209841395444, "compression_loss": 76.00296020507812, "distillation_loss": 3.724949359893799, "epoch": 3.02, "learning_rate": 3.8794965718042645e-05, "loss": 78.7935, "step": 3569, "task_loss": 2.0544230937957764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7111479817221347, "compression/movement_sparsity/importance_threshold": -0.002054632761712736, "compression/movement_sparsity/linear_layer_sparsity": 0.7039021814740026, "compression/movement_sparsity/model_sparsity": 0.6797209760839752, "compression_loss": 76.04196166992188, "distillation_loss": 1.857201099395752, "epoch": 3.02, "learning_rate": 3.879026955950033e-05, "loss": 78.543, "step": 3570, "task_loss": 2.0739026069641113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7115168859154377, "compression/movement_sparsity/importance_threshold": -0.0020520087099715973, "compression/movement_sparsity/linear_layer_sparsity": 0.7044921892886251, "compression/movement_sparsity/model_sparsity": 0.6802907153150891, "compression_loss": 76.0809326171875, "distillation_loss": 3.771374225616455, "epoch": 3.02, "learning_rate": 3.878557340095802e-05, "loss": 79.2204, "step": 3571, "task_loss": 2.6723363399505615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7118854758803155, "compression/movement_sparsity/importance_threshold": -0.0020493868933680933, "compression/movement_sparsity/linear_layer_sparsity": 0.7050214315449753, "compression/movement_sparsity/model_sparsity": 0.680801776471794, "compression_loss": 76.11990356445312, "distillation_loss": 2.848438024520874, "epoch": 3.02, "learning_rate": 3.878087724241571e-05, "loss": 79.2632, "step": 3572, "task_loss": 2.4339377880096436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7122537517506536, "compression/movement_sparsity/importance_threshold": -0.0020467673109498836, "compression/movement_sparsity/linear_layer_sparsity": 0.7054781867862671, "compression/movement_sparsity/model_sparsity": 0.6812428407654039, "compression_loss": 76.1588134765625, "distillation_loss": 4.228837490081787, "epoch": 3.02, "learning_rate": 3.877618108387339e-05, "loss": 79.3805, "step": 3573, "task_loss": 3.396289110183716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7126217136603368, "compression/movement_sparsity/importance_threshold": -0.0020441499617646344, "compression/movement_sparsity/linear_layer_sparsity": 0.7058975597620203, "compression/movement_sparsity/model_sparsity": 0.6816478069892979, "compression_loss": 76.19766235351562, "distillation_loss": 2.8888440132141113, "epoch": 3.02, "learning_rate": 3.877148492533108e-05, "loss": 78.857, "step": 3574, "task_loss": 1.8292564153671265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7129893617432504, "compression/movement_sparsity/importance_threshold": -0.0020415348448600025, "compression/movement_sparsity/linear_layer_sparsity": 0.7063025283432695, "compression/movement_sparsity/model_sparsity": 0.682038863653952, "compression_loss": 76.23652648925781, "distillation_loss": 2.794372797012329, "epoch": 3.02, "learning_rate": 3.876678876678877e-05, "loss": 79.2689, "step": 3575, "task_loss": 1.8897600173950195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7133566961332795, "compression/movement_sparsity/importance_threshold": -0.002038921959283654, "compression/movement_sparsity/linear_layer_sparsity": 0.7069902666358355, "compression/movement_sparsity/model_sparsity": 0.6827029760204381, "compression_loss": 76.27535247802734, "distillation_loss": 2.8937387466430664, "epoch": 3.02, "learning_rate": 3.8762092608246456e-05, "loss": 78.8188, "step": 3576, "task_loss": 3.074411630630493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7137237169643091, "compression/movement_sparsity/importance_threshold": -0.002036311304083249, "compression/movement_sparsity/linear_layer_sparsity": 0.707524016227552, "compression/movement_sparsity/model_sparsity": 0.6832183896716733, "compression_loss": 76.31414031982422, "distillation_loss": 3.076137065887451, "epoch": 3.02, "learning_rate": 3.875739644970414e-05, "loss": 79.2955, "step": 3577, "task_loss": 1.1422251462936401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7140904243702249, "compression/movement_sparsity/importance_threshold": -0.0020337028783064476, "compression/movement_sparsity/linear_layer_sparsity": 0.7079343268359021, "compression/movement_sparsity/model_sparsity": 0.6836146048483634, "compression_loss": 76.35285186767578, "distillation_loss": 3.365786552429199, "epoch": 3.02, "learning_rate": 3.875270029116183e-05, "loss": 79.3939, "step": 3578, "task_loss": 0.967184841632843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7144568184849114, "compression/movement_sparsity/importance_threshold": -0.002031096681000914, "compression/movement_sparsity/linear_layer_sparsity": 0.7084079190018957, "compression/movement_sparsity/model_sparsity": 0.6840719276665151, "compression_loss": 76.39154815673828, "distillation_loss": 3.7945215702056885, "epoch": 3.03, "learning_rate": 3.874800413261952e-05, "loss": 80.0006, "step": 3579, "task_loss": 3.1342015266418457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7148228994422541, "compression/movement_sparsity/importance_threshold": -0.0020284927112143095, "compression/movement_sparsity/linear_layer_sparsity": 0.7088305711237488, "compression/movement_sparsity/model_sparsity": 0.6844800603877527, "compression_loss": 76.43022155761719, "distillation_loss": 2.576021194458008, "epoch": 3.03, "learning_rate": 3.874330797407721e-05, "loss": 79.2489, "step": 3580, "task_loss": 1.6111080646514893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7151886673761378, "compression/movement_sparsity/importance_threshold": -0.0020258909679942956, "compression/movement_sparsity/linear_layer_sparsity": 0.7092600080969867, "compression/movement_sparsity/model_sparsity": 0.6848947448798572, "compression_loss": 76.4688949584961, "distillation_loss": 3.1807336807250977, "epoch": 3.03, "learning_rate": 3.8738611815534894e-05, "loss": 79.4267, "step": 3581, "task_loss": 2.066669225692749 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7155541224204482, "compression/movement_sparsity/importance_threshold": -0.0020232914503885337, "compression/movement_sparsity/linear_layer_sparsity": 0.7097021562329245, "compression/movement_sparsity/model_sparsity": 0.6853217038671189, "compression_loss": 76.50749969482422, "distillation_loss": 3.7719128131866455, "epoch": 3.03, "learning_rate": 3.873391565699258e-05, "loss": 79.6982, "step": 3582, "task_loss": 1.8005620241165161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.71591926470907, "compression/movement_sparsity/importance_threshold": -0.0020206941574446866, "compression/movement_sparsity/linear_layer_sparsity": 0.7100754899974359, "compression/movement_sparsity/model_sparsity": 0.6856822124683102, "compression_loss": 76.54608917236328, "distillation_loss": 3.802804946899414, "epoch": 3.03, "learning_rate": 3.872921949845027e-05, "loss": 79.9841, "step": 3583, "task_loss": 2.760200023651123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7162840943758886, "compression/movement_sparsity/importance_threshold": -0.0020180990882104144, "compression/movement_sparsity/linear_layer_sparsity": 0.7106401589558321, "compression/movement_sparsity/model_sparsity": 0.6862274833108606, "compression_loss": 76.58467864990234, "distillation_loss": 5.526923179626465, "epoch": 3.03, "learning_rate": 3.872452333990796e-05, "loss": 79.7535, "step": 3584, "task_loss": 2.649855375289917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7166486115547885, "compression/movement_sparsity/importance_threshold": -0.0020155062417333844, "compression/movement_sparsity/linear_layer_sparsity": 0.7109369157157859, "compression/movement_sparsity/model_sparsity": 0.6865140455631793, "compression_loss": 76.62318420410156, "distillation_loss": 3.438778877258301, "epoch": 3.03, "learning_rate": 3.871982718136564e-05, "loss": 79.9915, "step": 3585, "task_loss": 2.7004902362823486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7170128163796559, "compression/movement_sparsity/importance_threshold": -0.0020129156170612507, "compression/movement_sparsity/linear_layer_sparsity": 0.7112272215010486, "compression/movement_sparsity/model_sparsity": 0.6867943784516333, "compression_loss": 76.6617431640625, "distillation_loss": 4.084939956665039, "epoch": 3.03, "learning_rate": 3.871513102282333e-05, "loss": 79.9178, "step": 3586, "task_loss": 2.346627950668335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.717376708984375, "compression/movement_sparsity/importance_threshold": -0.0020103272132416805, "compression/movement_sparsity/linear_layer_sparsity": 0.7116363993134732, "compression/movement_sparsity/model_sparsity": 0.6871894997474229, "compression_loss": 76.7001953125, "distillation_loss": 1.6830846071243286, "epoch": 3.03, "learning_rate": 3.871043486428102e-05, "loss": 79.3012, "step": 3587, "task_loss": 2.4496893882751465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7177402895028313, "compression/movement_sparsity/importance_threshold": -0.002007741029322335, "compression/movement_sparsity/linear_layer_sparsity": 0.7119228774409249, "compression/movement_sparsity/model_sparsity": 0.6874661364698867, "compression_loss": 76.73859405517578, "distillation_loss": 2.544583797454834, "epoch": 3.03, "learning_rate": 3.870573870573871e-05, "loss": 79.4548, "step": 3588, "task_loss": 1.2756794691085815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7181035580689104, "compression/movement_sparsity/importance_threshold": -0.0020051570643508715, "compression/movement_sparsity/linear_layer_sparsity": 0.7123222178150499, "compression/movement_sparsity/model_sparsity": 0.6878517582736459, "compression_loss": 76.7770004272461, "distillation_loss": 2.902374505996704, "epoch": 3.03, "learning_rate": 3.87010425471964e-05, "loss": 79.2501, "step": 3589, "task_loss": 1.5532714128494263 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7184665148164965, "compression/movement_sparsity/importance_threshold": -0.002002575317374959, "compression/movement_sparsity/linear_layer_sparsity": 0.7127679074287755, "compression/movement_sparsity/model_sparsity": 0.6882821370780384, "compression_loss": 76.8154296875, "distillation_loss": 3.2264153957366943, "epoch": 3.03, "learning_rate": 3.869634638865408e-05, "loss": 79.6581, "step": 3590, "task_loss": 1.8151187896728516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7188291598794756, "compression/movement_sparsity/importance_threshold": -0.001999995787442253, "compression/movement_sparsity/linear_layer_sparsity": 0.7132070149019661, "compression/movement_sparsity/model_sparsity": 0.6887061598586725, "compression_loss": 76.85374450683594, "distillation_loss": 3.451051712036133, "epoch": 3.04, "learning_rate": 3.869165023011177e-05, "loss": 80.1288, "step": 3591, "task_loss": 2.304154872894287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7191914933917322, "compression/movement_sparsity/importance_threshold": -0.00199741847360042, "compression/movement_sparsity/linear_layer_sparsity": 0.7135565599520438, "compression/movement_sparsity/model_sparsity": 0.6890436969609536, "compression_loss": 76.89208984375, "distillation_loss": 3.76478910446167, "epoch": 3.04, "learning_rate": 3.868695407156946e-05, "loss": 79.6386, "step": 3592, "task_loss": 2.6726183891296387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.719553515487152, "compression/movement_sparsity/importance_threshold": -0.001994843374897119, "compression/movement_sparsity/linear_layer_sparsity": 0.7138589806916246, "compression/movement_sparsity/model_sparsity": 0.6893357286177747, "compression_loss": 76.93038940429688, "distillation_loss": 2.9743919372558594, "epoch": 3.04, "learning_rate": 3.868225791302715e-05, "loss": 80.0631, "step": 3593, "task_loss": 1.4283536672592163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7199152262996196, "compression/movement_sparsity/importance_threshold": -0.0019922704903800135, "compression/movement_sparsity/linear_layer_sparsity": 0.714378683613866, "compression/movement_sparsity/model_sparsity": 0.689837578145844, "compression_loss": 76.9686050415039, "distillation_loss": 4.110658168792725, "epoch": 3.04, "learning_rate": 3.867756175448483e-05, "loss": 79.7071, "step": 3594, "task_loss": 2.399956703186035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7202766259630206, "compression/movement_sparsity/importance_threshold": -0.0019896998190967637, "compression/movement_sparsity/linear_layer_sparsity": 0.7148099807572552, "compression/movement_sparsity/model_sparsity": 0.6902540589055326, "compression_loss": 77.0068130493164, "distillation_loss": 4.02730131149292, "epoch": 3.04, "learning_rate": 3.867286559594252e-05, "loss": 80.3342, "step": 3595, "task_loss": 2.426424026489258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7206377146112397, "compression/movement_sparsity/importance_threshold": -0.001987131360095034, "compression/movement_sparsity/linear_layer_sparsity": 0.7152009384415323, "compression/movement_sparsity/model_sparsity": 0.6906315859906281, "compression_loss": 77.04498291015625, "distillation_loss": 3.765524387359619, "epoch": 3.04, "learning_rate": 3.866816943740021e-05, "loss": 80.2511, "step": 3596, "task_loss": 2.214118003845215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7209984923781626, "compression/movement_sparsity/importance_threshold": -0.0019845651124224825, "compression/movement_sparsity/linear_layer_sparsity": 0.7155646136302585, "compression/movement_sparsity/model_sparsity": 0.6909827678178259, "compression_loss": 77.08304595947266, "distillation_loss": 2.9490489959716797, "epoch": 3.04, "learning_rate": 3.8663473278857896e-05, "loss": 80.245, "step": 3597, "task_loss": 3.5179271697998047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7213589593976738, "compression/movement_sparsity/importance_threshold": -0.001982001075126776, "compression/movement_sparsity/linear_layer_sparsity": 0.7159031526736083, "compression/movement_sparsity/model_sparsity": 0.6913096770035687, "compression_loss": 77.12120819091797, "distillation_loss": 2.673316717147827, "epoch": 3.04, "learning_rate": 3.865877712031558e-05, "loss": 79.7594, "step": 3598, "task_loss": 2.7611522674560547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7217191158036588, "compression/movement_sparsity/importance_threshold": -0.001979439247255573, "compression/movement_sparsity/linear_layer_sparsity": 0.7162924767469192, "compression/movement_sparsity/model_sparsity": 0.6916856265972604, "compression_loss": 77.15927124023438, "distillation_loss": 3.387247085571289, "epoch": 3.04, "learning_rate": 3.865408096177327e-05, "loss": 80.185, "step": 3599, "task_loss": 2.1747686862945557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.722078961730003, "compression/movement_sparsity/importance_threshold": -0.001976879627856534, "compression/movement_sparsity/linear_layer_sparsity": 0.7167712916983373, "compression/movement_sparsity/model_sparsity": 0.6921479927820902, "compression_loss": 77.19727325439453, "distillation_loss": 2.936380386352539, "epoch": 3.04, "learning_rate": 3.864938480323096e-05, "loss": 79.8233, "step": 3600, "task_loss": 2.0573222637176514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7224384973105908, "compression/movement_sparsity/importance_threshold": -0.001974322215977325, "compression/movement_sparsity/linear_layer_sparsity": 0.7171789074448016, "compression/movement_sparsity/model_sparsity": 0.6925416056736908, "compression_loss": 77.23523712158203, "distillation_loss": 2.7562310695648193, "epoch": 3.04, "learning_rate": 3.864468864468865e-05, "loss": 80.0178, "step": 3601, "task_loss": 3.1290736198425293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7227977226793083, "compression/movement_sparsity/importance_threshold": -0.001971767010665603, "compression/movement_sparsity/linear_layer_sparsity": 0.7175835302251894, "compression/movement_sparsity/model_sparsity": 0.692932328416807, "compression_loss": 77.27323150634766, "distillation_loss": 3.1218841075897217, "epoch": 3.04, "learning_rate": 3.8639992486146335e-05, "loss": 80.8203, "step": 3602, "task_loss": 2.320037841796875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7231566379700398, "compression/movement_sparsity/importance_threshold": -0.0019692140109690342, "compression/movement_sparsity/linear_layer_sparsity": 0.7179439024194804, "compression/movement_sparsity/model_sparsity": 0.6932803207175896, "compression_loss": 77.31119537353516, "distillation_loss": 3.804900646209717, "epoch": 3.05, "learning_rate": 3.863529632760402e-05, "loss": 80.4511, "step": 3603, "task_loss": 2.0185909271240234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7235152433166709, "compression/movement_sparsity/importance_threshold": -0.001966663215935278, "compression/movement_sparsity/linear_layer_sparsity": 0.7182717931811313, "compression/movement_sparsity/model_sparsity": 0.6935969474228678, "compression_loss": 77.34909057617188, "distillation_loss": 3.6690030097961426, "epoch": 3.05, "learning_rate": 3.863060016906171e-05, "loss": 80.5137, "step": 3604, "task_loss": 2.2109761238098145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7238735388530865, "compression/movement_sparsity/importance_threshold": -0.001964114624611998, "compression/movement_sparsity/linear_layer_sparsity": 0.7185627190231111, "compression/movement_sparsity/model_sparsity": 0.693877879067183, "compression_loss": 77.38697814941406, "distillation_loss": 2.8484833240509033, "epoch": 3.05, "learning_rate": 3.86259040105194e-05, "loss": 79.9221, "step": 3605, "task_loss": 1.2904871702194214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7242315247131721, "compression/movement_sparsity/importance_threshold": -0.0019615682360468534, "compression/movement_sparsity/linear_layer_sparsity": 0.7189379606544442, "compression/movement_sparsity/model_sparsity": 0.6942402299941014, "compression_loss": 77.4248275756836, "distillation_loss": 3.1338446140289307, "epoch": 3.05, "learning_rate": 3.862120785197709e-05, "loss": 80.4533, "step": 3606, "task_loss": 1.4031133651733398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7245892010308121, "compression/movement_sparsity/importance_threshold": -0.00195902404928751, "compression/movement_sparsity/linear_layer_sparsity": 0.7193546745408146, "compression/movement_sparsity/model_sparsity": 0.6946426284765133, "compression_loss": 77.46267700195312, "distillation_loss": 5.512173652648926, "epoch": 3.05, "learning_rate": 3.861651169343477e-05, "loss": 81.1316, "step": 3607, "task_loss": 2.3176984786987305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7249465679398923, "compression/movement_sparsity/importance_threshold": -0.001956482063381628, "compression/movement_sparsity/linear_layer_sparsity": 0.7197624453014583, "compression/movement_sparsity/model_sparsity": 0.6950363910570792, "compression_loss": 77.50045776367188, "distillation_loss": 3.249770164489746, "epoch": 3.05, "learning_rate": 3.861181553489246e-05, "loss": 80.5808, "step": 3608, "task_loss": 1.5010203123092651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.725303625574298, "compression/movement_sparsity/importance_threshold": -0.0019539422773768656, "compression/movement_sparsity/linear_layer_sparsity": 0.7200620280891417, "compression/movement_sparsity/model_sparsity": 0.6953256822543812, "compression_loss": 77.53821563720703, "distillation_loss": 2.6263980865478516, "epoch": 3.05, "learning_rate": 3.8607119376350146e-05, "loss": 80.6916, "step": 3609, "task_loss": 1.7960419654846191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7256603740679137, "compression/movement_sparsity/importance_threshold": -0.0019514046903208885, "compression/movement_sparsity/linear_layer_sparsity": 0.7203529300827861, "compression/movement_sparsity/model_sparsity": 0.6956065908696248, "compression_loss": 77.57591247558594, "distillation_loss": 3.2543461322784424, "epoch": 3.05, "learning_rate": 3.860242321780784e-05, "loss": 80.3773, "step": 3610, "task_loss": 1.2701221704483032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7260168135546251, "compression/movement_sparsity/importance_threshold": -0.0019488693012613568, "compression/movement_sparsity/linear_layer_sparsity": 0.7206372618600633, "compression/movement_sparsity/model_sparsity": 0.6958811549756456, "compression_loss": 77.61361694335938, "distillation_loss": 3.111206531524658, "epoch": 3.05, "learning_rate": 3.859772705926552e-05, "loss": 80.2321, "step": 3611, "task_loss": 1.3935354948043823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7263729441683169, "compression/movement_sparsity/importance_threshold": -0.0019463361092459351, "compression/movement_sparsity/linear_layer_sparsity": 0.7209842432140993, "compression/movement_sparsity/model_sparsity": 0.696216216452731, "compression_loss": 77.6512451171875, "distillation_loss": 5.763064384460449, "epoch": 3.05, "learning_rate": 3.859303090072321e-05, "loss": 81.1125, "step": 3612, "task_loss": 3.562041997909546 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7267287660428745, "compression/movement_sparsity/importance_threshold": -0.0019438051133222828, "compression/movement_sparsity/linear_layer_sparsity": 0.7213412647172847, "compression/movement_sparsity/model_sparsity": 0.6965609731689554, "compression_loss": 77.6888656616211, "distillation_loss": 3.02589750289917, "epoch": 3.05, "learning_rate": 3.85883347421809e-05, "loss": 81.4022, "step": 3613, "task_loss": 1.8227323293685913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7270842793121828, "compression/movement_sparsity/importance_threshold": -0.0019412763125380636, "compression/movement_sparsity/linear_layer_sparsity": 0.7217602203471707, "compression/movement_sparsity/model_sparsity": 0.6969655363840965, "compression_loss": 77.7264633178711, "distillation_loss": 3.127047300338745, "epoch": 3.05, "learning_rate": 3.8583638583638584e-05, "loss": 80.4167, "step": 3614, "task_loss": 1.7973270416259766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7274394841101275, "compression/movement_sparsity/importance_threshold": -0.001938749705940935, "compression/movement_sparsity/linear_layer_sparsity": 0.7221312765956635, "compression/movement_sparsity/model_sparsity": 0.6973238457089511, "compression_loss": 77.76399230957031, "distillation_loss": 2.394876480102539, "epoch": 3.06, "learning_rate": 3.857894242509627e-05, "loss": 80.6622, "step": 3615, "task_loss": 1.9498845338821411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7277943805705929, "compression/movement_sparsity/importance_threshold": -0.0019362252925785653, "compression/movement_sparsity/linear_layer_sparsity": 0.722390281440882, "compression/movement_sparsity/model_sparsity": 0.6975739529409443, "compression_loss": 77.80156707763672, "distillation_loss": 2.980006694793701, "epoch": 3.06, "learning_rate": 3.857424626655396e-05, "loss": 80.4107, "step": 3616, "task_loss": 2.066474437713623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7281489688274649, "compression/movement_sparsity/importance_threshold": -0.0019337030714986101, "compression/movement_sparsity/linear_layer_sparsity": 0.7227567945815055, "compression/movement_sparsity/model_sparsity": 0.6979278752276611, "compression_loss": 77.83901977539062, "distillation_loss": 2.9897866249084473, "epoch": 3.06, "learning_rate": 3.856955010801165e-05, "loss": 80.479, "step": 3617, "task_loss": 2.2164440155029297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7285032490146282, "compression/movement_sparsity/importance_threshold": -0.0019311830417487368, "compression/movement_sparsity/linear_layer_sparsity": 0.723155991865619, "compression/movement_sparsity/model_sparsity": 0.6983133588569908, "compression_loss": 77.87645721435547, "distillation_loss": 3.040661096572876, "epoch": 3.06, "learning_rate": 3.8564853949469336e-05, "loss": 80.82, "step": 3618, "task_loss": 2.046982526779175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7288572212659681, "compression/movement_sparsity/importance_threshold": -0.0019286652023766029, "compression/movement_sparsity/linear_layer_sparsity": 0.723475893434954, "compression/movement_sparsity/model_sparsity": 0.6986222708232867, "compression_loss": 77.91387176513672, "distillation_loss": 2.612502098083496, "epoch": 3.06, "learning_rate": 3.856015779092703e-05, "loss": 81.8187, "step": 3619, "task_loss": 0.7493927478790283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7292108857153698, "compression/movement_sparsity/importance_threshold": -0.001926149552429872, "compression/movement_sparsity/linear_layer_sparsity": 0.7238542830465428, "compression/movement_sparsity/model_sparsity": 0.6989876615876548, "compression_loss": 77.95121002197266, "distillation_loss": 4.236021041870117, "epoch": 3.06, "learning_rate": 3.855546163238471e-05, "loss": 81.2748, "step": 3620, "task_loss": 2.453688621520996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7295642424967181, "compression/movement_sparsity/importance_threshold": -0.0019236360909562072, "compression/movement_sparsity/linear_layer_sparsity": 0.7241380543879411, "compression/movement_sparsity/model_sparsity": 0.6992616845104933, "compression_loss": 77.98860931396484, "distillation_loss": 4.305092811584473, "epoch": 3.06, "learning_rate": 3.8550765473842395e-05, "loss": 81.9785, "step": 3621, "task_loss": 2.4622764587402344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7299172917438987, "compression/movement_sparsity/importance_threshold": -0.0019211248170032677, "compression/movement_sparsity/linear_layer_sparsity": 0.7243878895482476, "compression/movement_sparsity/model_sparsity": 0.6995029370644605, "compression_loss": 78.02597045898438, "distillation_loss": 3.942403793334961, "epoch": 3.06, "learning_rate": 3.854606931530009e-05, "loss": 81.0987, "step": 3622, "task_loss": 1.9447221755981445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7302700335907963, "compression/movement_sparsity/importance_threshold": -0.0019186157296187173, "compression/movement_sparsity/linear_layer_sparsity": 0.7247401533085464, "compression/movement_sparsity/model_sparsity": 0.6998430994809028, "compression_loss": 78.06321716308594, "distillation_loss": 2.7529263496398926, "epoch": 3.06, "learning_rate": 3.8541373156757775e-05, "loss": 80.8573, "step": 3623, "task_loss": 1.8107905387878418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7306224681712964, "compression/movement_sparsity/importance_threshold": -0.001916108827850216, "compression/movement_sparsity/linear_layer_sparsity": 0.7251364053232537, "compression/movement_sparsity/model_sparsity": 0.7002257390198913, "compression_loss": 78.10044860839844, "distillation_loss": 3.3587710857391357, "epoch": 3.06, "learning_rate": 3.853667699821546e-05, "loss": 81.4585, "step": 3624, "task_loss": 2.812657117843628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7309745956192836, "compression/movement_sparsity/importance_threshold": -0.0019136041107454295, "compression/movement_sparsity/linear_layer_sparsity": 0.725565568040636, "compression/movement_sparsity/model_sparsity": 0.7006401586776726, "compression_loss": 78.13768768310547, "distillation_loss": 3.1307148933410645, "epoch": 3.06, "learning_rate": 3.853198083967315e-05, "loss": 81.4581, "step": 3625, "task_loss": 1.4608787298202515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7313264160686437, "compression/movement_sparsity/importance_threshold": -0.0019111015773520144, "compression/movement_sparsity/linear_layer_sparsity": 0.7259479641725505, "compression/movement_sparsity/model_sparsity": 0.7010094183260677, "compression_loss": 78.17484283447266, "distillation_loss": 3.7934482097625732, "epoch": 3.07, "learning_rate": 3.852728468113084e-05, "loss": 81.5024, "step": 3626, "task_loss": 2.244081497192383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7316779296532612, "compression/movement_sparsity/importance_threshold": -0.001908601226717637, "compression/movement_sparsity/linear_layer_sparsity": 0.7263423679412744, "compression/movement_sparsity/model_sparsity": 0.701390273112008, "compression_loss": 78.21199035644531, "distillation_loss": 2.209555149078369, "epoch": 3.07, "learning_rate": 3.852258852258853e-05, "loss": 80.8879, "step": 3627, "task_loss": 1.2444654703140259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7320291365070214, "compression/movement_sparsity/importance_threshold": -0.0019061030578899586, "compression/movement_sparsity/linear_layer_sparsity": 0.7266099581871908, "compression/movement_sparsity/model_sparsity": 0.7016486708097732, "compression_loss": 78.24909973144531, "distillation_loss": 3.171116828918457, "epoch": 3.07, "learning_rate": 3.8517892364046206e-05, "loss": 81.1546, "step": 3628, "task_loss": 2.837592601776123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.73238003676381, "compression/movement_sparsity/importance_threshold": -0.0019036070699166366, "compression/movement_sparsity/linear_layer_sparsity": 0.7271135754072914, "compression/movement_sparsity/model_sparsity": 0.7021349872290557, "compression_loss": 78.28617095947266, "distillation_loss": 3.9245336055755615, "epoch": 3.07, "learning_rate": 3.85131962055039e-05, "loss": 81.3603, "step": 3629, "task_loss": 2.1953577995300293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7327306305575114, "compression/movement_sparsity/importance_threshold": -0.0019011132618453382, "compression/movement_sparsity/linear_layer_sparsity": 0.7274727909573218, "compression/movement_sparsity/model_sparsity": 0.7024818626198662, "compression_loss": 78.32323455810547, "distillation_loss": 3.0762453079223633, "epoch": 3.07, "learning_rate": 3.8508500046961586e-05, "loss": 80.7824, "step": 3630, "task_loss": 1.3634660243988037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7330809180220114, "compression/movement_sparsity/importance_threshold": -0.001898621632723722, "compression/movement_sparsity/linear_layer_sparsity": 0.7278509540097255, "compression/movement_sparsity/model_sparsity": 0.7028470346080542, "compression_loss": 78.36022186279297, "distillation_loss": 3.711909294128418, "epoch": 3.07, "learning_rate": 3.850380388841928e-05, "loss": 81.9178, "step": 3631, "task_loss": 2.838240623474121 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7334308992911944, "compression/movement_sparsity/importance_threshold": -0.0018961321815994525, "compression/movement_sparsity/linear_layer_sparsity": 0.7283088062744397, "compression/movement_sparsity/model_sparsity": 0.7032891582389572, "compression_loss": 78.39717102050781, "distillation_loss": 4.17795991897583, "epoch": 3.07, "learning_rate": 3.849910772987696e-05, "loss": 81.7661, "step": 3632, "task_loss": 1.6887906789779663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7337805744989463, "compression/movement_sparsity/importance_threshold": -0.0018936449075201882, "compression/movement_sparsity/linear_layer_sparsity": 0.7286358981168591, "compression/movement_sparsity/model_sparsity": 0.7036050134703373, "compression_loss": 78.43412017822266, "distillation_loss": 4.000066757202148, "epoch": 3.07, "learning_rate": 3.849441157133465e-05, "loss": 81.8211, "step": 3633, "task_loss": 2.0126094818115234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7341299437791516, "compression/movement_sparsity/importance_threshold": -0.0018911598095335946, "compression/movement_sparsity/linear_layer_sparsity": 0.7289354689803749, "compression/movement_sparsity/model_sparsity": 0.7038942931531035, "compression_loss": 78.47105407714844, "distillation_loss": 4.185944557189941, "epoch": 3.07, "learning_rate": 3.848971541279234e-05, "loss": 81.7576, "step": 3634, "task_loss": 2.5888030529022217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.734479007265696, "compression/movement_sparsity/importance_threshold": -0.00188867688668733, "compression/movement_sparsity/linear_layer_sparsity": 0.7293808604899097, "compression/movement_sparsity/model_sparsity": 0.7043243840941013, "compression_loss": 78.50785827636719, "distillation_loss": 3.634857654571533, "epoch": 3.07, "learning_rate": 3.8485019254250024e-05, "loss": 81.2284, "step": 3635, "task_loss": 1.8804690837860107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7348277650924642, "compression/movement_sparsity/importance_threshold": -0.0018861961380290592, "compression/movement_sparsity/linear_layer_sparsity": 0.729704446627044, "compression/movement_sparsity/model_sparsity": 0.7046368540519576, "compression_loss": 78.54462432861328, "distillation_loss": 3.078972816467285, "epoch": 3.07, "learning_rate": 3.848032309570772e-05, "loss": 81.2204, "step": 3636, "task_loss": 1.6642597913742065 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7351762173933416, "compression/movement_sparsity/importance_threshold": -0.0018837175626064415, "compression/movement_sparsity/linear_layer_sparsity": 0.730141646233413, "compression/movement_sparsity/model_sparsity": 0.7050590345068645, "compression_loss": 78.5814208984375, "distillation_loss": 2.2603063583374023, "epoch": 3.07, "learning_rate": 3.84756269371654e-05, "loss": 81.6838, "step": 3637, "task_loss": 1.0078237056732178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7355243643022131, "compression/movement_sparsity/importance_threshold": -0.0018812411594671414, "compression/movement_sparsity/linear_layer_sparsity": 0.7304845495221175, "compression/movement_sparsity/model_sparsity": 0.7053901580127081, "compression_loss": 78.61814880371094, "distillation_loss": 4.980585098266602, "epoch": 3.08, "learning_rate": 3.847093077862309e-05, "loss": 81.5686, "step": 3638, "task_loss": 2.7391433715820312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7358722059529639, "compression/movement_sparsity/importance_threshold": -0.0018787669276588209, "compression/movement_sparsity/linear_layer_sparsity": 0.7308584198741723, "compression/movement_sparsity/model_sparsity": 0.7057511847680101, "compression_loss": 78.65486145019531, "distillation_loss": 2.8159961700439453, "epoch": 3.08, "learning_rate": 3.8466234620080777e-05, "loss": 81.5124, "step": 3639, "task_loss": 1.0816974639892578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7362197424794796, "compression/movement_sparsity/importance_threshold": -0.0018762948662291367, "compression/movement_sparsity/linear_layer_sparsity": 0.7312199606368918, "compression/movement_sparsity/model_sparsity": 0.7061003054933006, "compression_loss": 78.69156646728516, "distillation_loss": 3.644043207168579, "epoch": 3.08, "learning_rate": 3.846153846153846e-05, "loss": 82.3203, "step": 3640, "task_loss": 2.6660280227661133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7365669740156446, "compression/movement_sparsity/importance_threshold": -0.0018738249742257579, "compression/movement_sparsity/linear_layer_sparsity": 0.7315596444003346, "compression/movement_sparsity/model_sparsity": 0.7064283200744798, "compression_loss": 78.72823333740234, "distillation_loss": 2.9136228561401367, "epoch": 3.08, "learning_rate": 3.845684230299615e-05, "loss": 81.7791, "step": 3641, "task_loss": 2.4492287635803223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7369139006953448, "compression/movement_sparsity/importance_threshold": -0.0018713572506963401, "compression/movement_sparsity/linear_layer_sparsity": 0.7320433482604833, "compression/movement_sparsity/model_sparsity": 0.7068954072189852, "compression_loss": 78.76487731933594, "distillation_loss": 1.900512933731079, "epoch": 3.08, "learning_rate": 3.8452146144453836e-05, "loss": 82.1311, "step": 3642, "task_loss": 1.8224658966064453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7372605226524647, "compression/movement_sparsity/importance_threshold": -0.001868891694688549, "compression/movement_sparsity/linear_layer_sparsity": 0.7322990262629315, "compression/movement_sparsity/model_sparsity": 0.7071423018954918, "compression_loss": 78.80146789550781, "distillation_loss": 2.8961100578308105, "epoch": 3.08, "learning_rate": 3.844744998591153e-05, "loss": 82.2554, "step": 3643, "task_loss": 1.6882318258285522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7376068400208897, "compression/movement_sparsity/importance_threshold": -0.0018664283052500447, "compression/movement_sparsity/linear_layer_sparsity": 0.7325461904096877, "compression/movement_sparsity/model_sparsity": 0.707380975193441, "compression_loss": 78.8379898071289, "distillation_loss": 3.386237621307373, "epoch": 3.08, "learning_rate": 3.8442753827369215e-05, "loss": 81.9408, "step": 3644, "task_loss": 2.269223213195801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.737952852934505, "compression/movement_sparsity/importance_threshold": -0.001863967081428491, "compression/movement_sparsity/linear_layer_sparsity": 0.7330351409035962, "compression/movement_sparsity/model_sparsity": 0.7078531287336962, "compression_loss": 78.8745346069336, "distillation_loss": 3.4214401245117188, "epoch": 3.08, "learning_rate": 3.84380576688269e-05, "loss": 81.7945, "step": 3645, "task_loss": 1.4236786365509033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7382985615271958, "compression/movement_sparsity/importance_threshold": -0.0018615080222715462, "compression/movement_sparsity/linear_layer_sparsity": 0.7333611237984254, "compression/movement_sparsity/model_sparsity": 0.7081679131132473, "compression_loss": 78.91105651855469, "distillation_loss": 3.5701985359191895, "epoch": 3.08, "learning_rate": 3.843336151028459e-05, "loss": 82.0486, "step": 3646, "task_loss": 2.040390729904175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7386439659328468, "compression/movement_sparsity/importance_threshold": -0.0018590511268268767, "compression/movement_sparsity/linear_layer_sparsity": 0.7336099692528183, "compression/movement_sparsity/model_sparsity": 0.7084082099607436, "compression_loss": 78.947509765625, "distillation_loss": 3.398574113845825, "epoch": 3.08, "learning_rate": 3.8428665351742274e-05, "loss": 81.8212, "step": 3647, "task_loss": 1.7996249198913574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7389890662853433, "compression/movement_sparsity/importance_threshold": -0.0018565963941421428, "compression/movement_sparsity/linear_layer_sparsity": 0.733985711699192, "compression/movement_sparsity/model_sparsity": 0.7087710444981654, "compression_loss": 78.98395538330078, "distillation_loss": 3.5787901878356934, "epoch": 3.08, "learning_rate": 3.842396919319997e-05, "loss": 82.8732, "step": 3648, "task_loss": 1.3110698461532593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.739333862718571, "compression/movement_sparsity/importance_threshold": -0.001854143823265003, "compression/movement_sparsity/linear_layer_sparsity": 0.7343926835406039, "compression/movement_sparsity/model_sparsity": 0.7091640356048331, "compression_loss": 79.02035522460938, "distillation_loss": 2.5387022495269775, "epoch": 3.08, "learning_rate": 3.8419273034657653e-05, "loss": 82.1106, "step": 3649, "task_loss": 1.6305869817733765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7396783553664146, "compression/movement_sparsity/importance_threshold": -0.0018516934132431218, "compression/movement_sparsity/linear_layer_sparsity": 0.7347313060531272, "compression/movement_sparsity/model_sparsity": 0.7094910253923264, "compression_loss": 79.0567398071289, "distillation_loss": 4.1226725578308105, "epoch": 3.09, "learning_rate": 3.841457687611534e-05, "loss": 82.5003, "step": 3650, "task_loss": 2.6338038444519043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7400225443627593, "compression/movement_sparsity/importance_threshold": -0.0018492451631241611, "compression/movement_sparsity/linear_layer_sparsity": 0.7351253044001514, "compression/movement_sparsity/model_sparsity": 0.7098714886840497, "compression_loss": 79.09305572509766, "distillation_loss": 2.568681001663208, "epoch": 3.09, "learning_rate": 3.8409880717573026e-05, "loss": 81.9638, "step": 3651, "task_loss": 1.9989525079727173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.74036642984149, "compression/movement_sparsity/importance_threshold": -0.0018467990719557839, "compression/movement_sparsity/linear_layer_sparsity": 0.7356575276984105, "compression/movement_sparsity/model_sparsity": 0.7103854284747032, "compression_loss": 79.12940216064453, "distillation_loss": 4.945842742919922, "epoch": 3.09, "learning_rate": 3.840518455903071e-05, "loss": 83.0731, "step": 3652, "task_loss": 2.649742841720581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7407100119364922, "compression/movement_sparsity/importance_threshold": -0.0018443551387856503, "compression/movement_sparsity/linear_layer_sparsity": 0.7360241004598722, "compression/movement_sparsity/model_sparsity": 0.710739408334099, "compression_loss": 79.16572570800781, "distillation_loss": 3.7587387561798096, "epoch": 3.09, "learning_rate": 3.8400488400488406e-05, "loss": 81.8391, "step": 3653, "task_loss": 2.6968488693237305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7410532907816508, "compression/movement_sparsity/importance_threshold": -0.0018419133626614223, "compression/movement_sparsity/linear_layer_sparsity": 0.736452333092179, "compression/movement_sparsity/model_sparsity": 0.7111529298580883, "compression_loss": 79.20191955566406, "distillation_loss": 3.8144943714141846, "epoch": 3.09, "learning_rate": 3.8395792241946085e-05, "loss": 82.5356, "step": 3654, "task_loss": 3.18479323387146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7413962665108513, "compression/movement_sparsity/importance_threshold": -0.001839473742630761, "compression/movement_sparsity/linear_layer_sparsity": 0.736772067723167, "compression/movement_sparsity/model_sparsity": 0.7114616806208831, "compression_loss": 79.23815155029297, "distillation_loss": 3.1904659271240234, "epoch": 3.09, "learning_rate": 3.839109608340378e-05, "loss": 82.9102, "step": 3655, "task_loss": 1.6179560422897339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7417389392579783, "compression/movement_sparsity/importance_threshold": -0.001837036277741332, "compression/movement_sparsity/linear_layer_sparsity": 0.7371170935137107, "compression/movement_sparsity/model_sparsity": 0.7117948537140981, "compression_loss": 79.2742919921875, "distillation_loss": 5.169281005859375, "epoch": 3.09, "learning_rate": 3.8386399924861465e-05, "loss": 82.8656, "step": 3656, "task_loss": 2.73717999458313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7420813091569174, "compression/movement_sparsity/importance_threshold": -0.0018346009670407917, "compression/movement_sparsity/linear_layer_sparsity": 0.737456503021298, "compression/movement_sparsity/model_sparsity": 0.7121226034609539, "compression_loss": 79.31048583984375, "distillation_loss": 3.7399399280548096, "epoch": 3.09, "learning_rate": 3.838170376631916e-05, "loss": 83.3633, "step": 3657, "task_loss": 2.5063934326171875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7424233763415533, "compression/movement_sparsity/importance_threshold": -0.0018321678095768075, "compression/movement_sparsity/linear_layer_sparsity": 0.7377217799786929, "compression/movement_sparsity/model_sparsity": 0.7123787673387751, "compression_loss": 79.34648895263672, "distillation_loss": 4.163967609405518, "epoch": 3.09, "learning_rate": 3.837700760777684e-05, "loss": 83.5682, "step": 3658, "task_loss": 2.947725772857666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7427651409457716, "compression/movement_sparsity/importance_threshold": -0.001829736804397037, "compression/movement_sparsity/linear_layer_sparsity": 0.7380635385473044, "compression/movement_sparsity/model_sparsity": 0.7127087854491825, "compression_loss": 79.3825912475586, "distillation_loss": 4.886877059936523, "epoch": 3.09, "learning_rate": 3.837231144923453e-05, "loss": 83.7119, "step": 3659, "task_loss": 2.974186420440674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7431066031034573, "compression/movement_sparsity/importance_threshold": -0.0018273079505491422, "compression/movement_sparsity/linear_layer_sparsity": 0.7384222294339589, "compression/movement_sparsity/model_sparsity": 0.713055154200418, "compression_loss": 79.41864776611328, "distillation_loss": 4.562920093536377, "epoch": 3.09, "learning_rate": 3.836761529069222e-05, "loss": 83.2498, "step": 3660, "task_loss": 2.221745252609253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7434477629484952, "compression/movement_sparsity/importance_threshold": -0.0018248812470807894, "compression/movement_sparsity/linear_layer_sparsity": 0.7388189226428687, "compression/movement_sparsity/model_sparsity": 0.7134382197772308, "compression_loss": 79.45462036132812, "distillation_loss": 4.240085124969482, "epoch": 3.09, "learning_rate": 3.83629191321499e-05, "loss": 83.1296, "step": 3661, "task_loss": 3.0163581371307373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7437886206147711, "compression/movement_sparsity/importance_threshold": -0.0018224566930396344, "compression/movement_sparsity/linear_layer_sparsity": 0.7392026542815584, "compression/movement_sparsity/model_sparsity": 0.7138087690536349, "compression_loss": 79.49066162109375, "distillation_loss": 3.620213031768799, "epoch": 3.1, "learning_rate": 3.835822297360759e-05, "loss": 82.9234, "step": 3662, "task_loss": 2.4907641410827637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7441291762361695, "compression/movement_sparsity/importance_threshold": -0.0018200342874733436, "compression/movement_sparsity/linear_layer_sparsity": 0.73964271568816, "compression/movement_sparsity/model_sparsity": 0.7142337129971325, "compression_loss": 79.52662658691406, "distillation_loss": 4.0072808265686035, "epoch": 3.1, "learning_rate": 3.8353526815065276e-05, "loss": 83.335, "step": 3663, "task_loss": 3.2153725624084473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7444694299465761, "compression/movement_sparsity/importance_threshold": -0.0018176140294295755, "compression/movement_sparsity/linear_layer_sparsity": 0.7401298060119172, "compression/movement_sparsity/model_sparsity": 0.7147040702698036, "compression_loss": 79.56251525878906, "distillation_loss": 2.7105515003204346, "epoch": 3.1, "learning_rate": 3.834883065652297e-05, "loss": 83.0828, "step": 3664, "task_loss": 3.2697606086730957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7448093818798753, "compression/movement_sparsity/importance_threshold": -0.0018151959179559963, "compression/movement_sparsity/linear_layer_sparsity": 0.7405836636804736, "compression/movement_sparsity/model_sparsity": 0.7151423365312155, "compression_loss": 79.59847259521484, "distillation_loss": 2.769216299057007, "epoch": 3.1, "learning_rate": 3.8344134497980655e-05, "loss": 82.8006, "step": 3665, "task_loss": 2.261627674102783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7451490321699531, "compression/movement_sparsity/importance_threshold": -0.0018127799521002628, "compression/movement_sparsity/linear_layer_sparsity": 0.7408291703679283, "compression/movement_sparsity/model_sparsity": 0.7153794093086893, "compression_loss": 79.63426208496094, "distillation_loss": 4.782939910888672, "epoch": 3.1, "learning_rate": 3.833943833943834e-05, "loss": 83.2139, "step": 3666, "task_loss": 1.5994651317596436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7454883809506939, "compression/movement_sparsity/importance_threshold": -0.0018103661309100414, "compression/movement_sparsity/linear_layer_sparsity": 0.7411856195110672, "compression/movement_sparsity/model_sparsity": 0.7157236133271955, "compression_loss": 79.67015075683594, "distillation_loss": 3.817155361175537, "epoch": 3.1, "learning_rate": 3.833474218089603e-05, "loss": 83.5517, "step": 3667, "task_loss": 1.9653407335281372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7458274283559831, "compression/movement_sparsity/importance_threshold": -0.0018079544534329922, "compression/movement_sparsity/linear_layer_sparsity": 0.7414551533963082, "compression/movement_sparsity/model_sparsity": 0.7159838878942952, "compression_loss": 79.70594024658203, "distillation_loss": 3.180725574493408, "epoch": 3.1, "learning_rate": 3.8330046022353714e-05, "loss": 82.8141, "step": 3668, "task_loss": 1.3083261251449585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7461661745197063, "compression/movement_sparsity/importance_threshold": -0.0018055449187167737, "compression/movement_sparsity/linear_layer_sparsity": 0.7417742560464116, "compression/movement_sparsity/model_sparsity": 0.7162920283866929, "compression_loss": 79.7417221069336, "distillation_loss": 2.4553163051605225, "epoch": 3.1, "learning_rate": 3.832534986381141e-05, "loss": 82.5562, "step": 3669, "task_loss": 1.865386724472046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7465046195757481, "compression/movement_sparsity/importance_threshold": -0.0018031375258090532, "compression/movement_sparsity/linear_layer_sparsity": 0.7420615092447594, "compression/movement_sparsity/model_sparsity": 0.7165694135539835, "compression_loss": 79.77743530273438, "distillation_loss": 2.126021385192871, "epoch": 3.1, "learning_rate": 3.8320653705269094e-05, "loss": 82.8739, "step": 3670, "task_loss": 1.2257122993469238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7468427636579938, "compression/movement_sparsity/importance_threshold": -0.0018007322737574882, "compression/movement_sparsity/linear_layer_sparsity": 0.7424084429021249, "compression/movement_sparsity/model_sparsity": 0.7169044289729256, "compression_loss": 79.81311798095703, "distillation_loss": 3.1676647663116455, "epoch": 3.1, "learning_rate": 3.831595754672678e-05, "loss": 82.708, "step": 3671, "task_loss": 1.7792091369628906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7471806069003284, "compression/movement_sparsity/importance_threshold": -0.0017983291616097442, "compression/movement_sparsity/linear_layer_sparsity": 0.7428005214581597, "compression/movement_sparsity/model_sparsity": 0.7172830384243859, "compression_loss": 79.8487548828125, "distillation_loss": 3.155740737915039, "epoch": 3.1, "learning_rate": 3.8311261388184466e-05, "loss": 82.9398, "step": 3672, "task_loss": 2.3487908840179443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7475181494366374, "compression/movement_sparsity/importance_threshold": -0.0017959281884134797, "compression/movement_sparsity/linear_layer_sparsity": 0.7431723527775488, "compression/movement_sparsity/model_sparsity": 0.717642096194067, "compression_loss": 79.8843994140625, "distillation_loss": 4.795365333557129, "epoch": 3.1, "learning_rate": 3.830656522964215e-05, "loss": 83.1864, "step": 3673, "task_loss": 2.1964311599731445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7478553914008055, "compression/movement_sparsity/importance_threshold": -0.0017935293532163601, "compression/movement_sparsity/linear_layer_sparsity": 0.7434371766165737, "compression/movement_sparsity/model_sparsity": 0.717897822519528, "compression_loss": 79.92000579833984, "distillation_loss": 3.8655307292938232, "epoch": 3.11, "learning_rate": 3.8301869071099846e-05, "loss": 83.0824, "step": 3674, "task_loss": 1.9110203981399536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7481923329267182, "compression/movement_sparsity/importance_threshold": -0.001791132655066044, "compression/movement_sparsity/linear_layer_sparsity": 0.7438396649709547, "compression/movement_sparsity/model_sparsity": 0.7182864841607369, "compression_loss": 79.95556640625, "distillation_loss": 2.904423713684082, "epoch": 3.11, "learning_rate": 3.8297172912557525e-05, "loss": 83.1957, "step": 3675, "task_loss": 1.7870663404464722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7485289741482603, "compression/movement_sparsity/importance_threshold": -0.0017887380930101957, "compression/movement_sparsity/linear_layer_sparsity": 0.7441188574319807, "compression/movement_sparsity/model_sparsity": 0.7185560855018304, "compression_loss": 79.9911117553711, "distillation_loss": 4.19931697845459, "epoch": 3.11, "learning_rate": 3.829247675401522e-05, "loss": 83.5687, "step": 3676, "task_loss": 1.8488175868988037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7488653151993174, "compression/movement_sparsity/importance_threshold": -0.001786345666096473, "compression/movement_sparsity/linear_layer_sparsity": 0.7443954504244622, "compression/movement_sparsity/model_sparsity": 0.7188231766741205, "compression_loss": 80.0266342163086, "distillation_loss": 2.333275318145752, "epoch": 3.11, "learning_rate": 3.8287780595472905e-05, "loss": 83.63, "step": 3677, "task_loss": 1.781274437904358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.749201356213774, "compression/movement_sparsity/importance_threshold": -0.001783955373372544, "compression/movement_sparsity/linear_layer_sparsity": 0.7447947788744196, "compression/movement_sparsity/model_sparsity": 0.7192087869633439, "compression_loss": 80.06214904785156, "distillation_loss": 3.2634546756744385, "epoch": 3.11, "learning_rate": 3.828308443693059e-05, "loss": 82.8212, "step": 3678, "task_loss": 1.6374863386154175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7495370973255158, "compression/movement_sparsity/importance_threshold": -0.0017815672138860672, "compression/movement_sparsity/linear_layer_sparsity": 0.7451864162362519, "compression/movement_sparsity/model_sparsity": 0.7195869703769798, "compression_loss": 80.09757995605469, "distillation_loss": 2.0941784381866455, "epoch": 3.11, "learning_rate": 3.827838827838828e-05, "loss": 82.9126, "step": 3679, "task_loss": 1.2722421884536743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7498725386684278, "compression/movement_sparsity/importance_threshold": -0.0017791811866847019, "compression/movement_sparsity/linear_layer_sparsity": 0.7457016356714623, "compression/movement_sparsity/model_sparsity": 0.7200844904395903, "compression_loss": 80.13294219970703, "distillation_loss": 3.3177177906036377, "epoch": 3.11, "learning_rate": 3.8273692119845964e-05, "loss": 83.3908, "step": 3680, "task_loss": 2.698725461959839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7502076803763951, "compression/movement_sparsity/importance_threshold": -0.0017767972908161143, "compression/movement_sparsity/linear_layer_sparsity": 0.7460651677701768, "compression/movement_sparsity/model_sparsity": 0.7204355340923585, "compression_loss": 80.1683349609375, "distillation_loss": 3.577864170074463, "epoch": 3.11, "learning_rate": 3.826899596130366e-05, "loss": 83.3981, "step": 3681, "task_loss": 2.3206160068511963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7505425225833029, "compression/movement_sparsity/importance_threshold": -0.001774415525327963, "compression/movement_sparsity/linear_layer_sparsity": 0.7465022123623665, "compression/movement_sparsity/model_sparsity": 0.7208575648583, "compression_loss": 80.20368194580078, "distillation_loss": 3.854102849960327, "epoch": 3.11, "learning_rate": 3.826429980276134e-05, "loss": 83.8693, "step": 3682, "task_loss": 2.2054762840270996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.750877065423036, "compression/movement_sparsity/importance_threshold": -0.0017720358892679144, "compression/movement_sparsity/linear_layer_sparsity": 0.7469992355177645, "compression/movement_sparsity/model_sparsity": 0.721337513739288, "compression_loss": 80.2390365600586, "distillation_loss": 3.845104694366455, "epoch": 3.11, "learning_rate": 3.8259603644219036e-05, "loss": 84.0212, "step": 3683, "task_loss": 2.196621894836426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7512113090294802, "compression/movement_sparsity/importance_threshold": -0.0017696583816836234, "compression/movement_sparsity/linear_layer_sparsity": 0.7472412126515992, "compression/movement_sparsity/model_sparsity": 0.7215711782141666, "compression_loss": 80.27432250976562, "distillation_loss": 2.7986457347869873, "epoch": 3.11, "learning_rate": 3.8254907485676716e-05, "loss": 84.0814, "step": 3684, "task_loss": 1.7085652351379395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7515452535365199, "compression/movement_sparsity/importance_threshold": -0.001767283001622759, "compression/movement_sparsity/linear_layer_sparsity": 0.7476627319775269, "compression/movement_sparsity/model_sparsity": 0.7219782170545037, "compression_loss": 80.30958557128906, "distillation_loss": 3.1509742736816406, "epoch": 3.11, "learning_rate": 3.82502113271344e-05, "loss": 83.5182, "step": 3685, "task_loss": 1.650642991065979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.751878899078041, "compression/movement_sparsity/importance_threshold": -0.0017649097481329768, "compression/movement_sparsity/linear_layer_sparsity": 0.7479168479140147, "compression/movement_sparsity/model_sparsity": 0.7222236033268211, "compression_loss": 80.34476470947266, "distillation_loss": 4.156922817230225, "epoch": 3.12, "learning_rate": 3.8245515168592095e-05, "loss": 83.8542, "step": 3686, "task_loss": 2.023454189300537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7522122457879278, "compression/movement_sparsity/importance_threshold": -0.0017625386202619444, "compression/movement_sparsity/linear_layer_sparsity": 0.7481856782733652, "compression/movement_sparsity/model_sparsity": 0.722483198536309, "compression_loss": 80.38001251220703, "distillation_loss": 4.359353065490723, "epoch": 3.12, "learning_rate": 3.824081901004978e-05, "loss": 84.2737, "step": 3687, "task_loss": 2.143462657928467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7525452938000659, "compression/movement_sparsity/importance_threshold": -0.0017601696170573208, "compression/movement_sparsity/linear_layer_sparsity": 0.7483774427372852, "compression/movement_sparsity/model_sparsity": 0.7226683753009568, "compression_loss": 80.41522216796875, "distillation_loss": 4.215389251708984, "epoch": 3.12, "learning_rate": 3.823612285150747e-05, "loss": 84.387, "step": 3688, "task_loss": 2.3091604709625244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7528780432483407, "compression/movement_sparsity/importance_threshold": -0.0017578027375667656, "compression/movement_sparsity/linear_layer_sparsity": 0.7489122416557537, "compression/movement_sparsity/model_sparsity": 0.723184802231342, "compression_loss": 80.45034790039062, "distillation_loss": 2.9663376808166504, "epoch": 3.12, "learning_rate": 3.8231426692965154e-05, "loss": 83.209, "step": 3689, "task_loss": 2.258729934692383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7532104942666369, "compression/movement_sparsity/importance_threshold": -0.001755437980837945, "compression/movement_sparsity/linear_layer_sparsity": 0.7491004407935505, "compression/movement_sparsity/model_sparsity": 0.7233665361497873, "compression_loss": 80.4854736328125, "distillation_loss": 3.517035722732544, "epoch": 3.12, "learning_rate": 3.822673053442285e-05, "loss": 84.0782, "step": 3690, "task_loss": 2.1024229526519775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.75354264698884, "compression/movement_sparsity/importance_threshold": -0.0017530753459185167, "compression/movement_sparsity/linear_layer_sparsity": 0.7494771252491674, "compression/movement_sparsity/model_sparsity": 0.7237302803355368, "compression_loss": 80.52053833007812, "distillation_loss": 4.278273582458496, "epoch": 3.12, "learning_rate": 3.8222034375880534e-05, "loss": 84.3108, "step": 3691, "task_loss": 2.9263436794281006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7538745015488346, "compression/movement_sparsity/importance_threshold": -0.001750714831856147, "compression/movement_sparsity/linear_layer_sparsity": 0.7497965021551264, "compression/movement_sparsity/model_sparsity": 0.7240386856622578, "compression_loss": 80.55548858642578, "distillation_loss": 3.486096143722534, "epoch": 3.12, "learning_rate": 3.8217338217338214e-05, "loss": 84.2318, "step": 3692, "task_loss": 2.3125672340393066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7542060580805066, "compression/movement_sparsity/importance_threshold": -0.0017483564376984933, "compression/movement_sparsity/linear_layer_sparsity": 0.7499845224304087, "compression/movement_sparsity/model_sparsity": 0.7242202468626661, "compression_loss": 80.59054565429688, "distillation_loss": 3.6499361991882324, "epoch": 3.12, "learning_rate": 3.821264205879591e-05, "loss": 84.1927, "step": 3693, "task_loss": 3.155308246612549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7545373167177403, "compression/movement_sparsity/importance_threshold": -0.0017460001624932222, "compression/movement_sparsity/linear_layer_sparsity": 0.7503934021386424, "compression/movement_sparsity/model_sparsity": 0.7246150802950609, "compression_loss": 80.62547302246094, "distillation_loss": 4.1806135177612305, "epoch": 3.12, "learning_rate": 3.820794590025359e-05, "loss": 84.1705, "step": 3694, "task_loss": 1.7564631700515747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7548682775944217, "compression/movement_sparsity/importance_threshold": -0.0017436460052879894, "compression/movement_sparsity/linear_layer_sparsity": 0.750638062210195, "compression/movement_sparsity/model_sparsity": 0.7248513355404933, "compression_loss": 80.66045379638672, "distillation_loss": 3.6107096672058105, "epoch": 3.12, "learning_rate": 3.8203249741711286e-05, "loss": 84.1477, "step": 3695, "task_loss": 3.3732805252075195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7551989408444351, "compression/movement_sparsity/importance_threshold": -0.001741293965130464, "compression/movement_sparsity/linear_layer_sparsity": 0.7509345566384608, "compression/movement_sparsity/model_sparsity": 0.7251376444730245, "compression_loss": 80.69535064697266, "distillation_loss": 3.664377212524414, "epoch": 3.12, "learning_rate": 3.819855358316897e-05, "loss": 83.9054, "step": 3696, "task_loss": 2.5417423248291016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7555293066016664, "compression/movement_sparsity/importance_threshold": -0.0017389440410683007, "compression/movement_sparsity/linear_layer_sparsity": 0.751219114974923, "compression/movement_sparsity/model_sparsity": 0.7254124273552255, "compression_loss": 80.73026275634766, "distillation_loss": 3.0544984340667725, "epoch": 3.13, "learning_rate": 3.819385742462666e-05, "loss": 84.0326, "step": 3697, "task_loss": 2.213433027267456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.755859375, "compression/movement_sparsity/importance_threshold": -0.001736596232149168, "compression/movement_sparsity/linear_layer_sparsity": 0.7515690177500297, "compression/movement_sparsity/model_sparsity": 0.7257503098935805, "compression_loss": 80.76513671875, "distillation_loss": 5.8036065101623535, "epoch": 3.13, "learning_rate": 3.8189161266084345e-05, "loss": 84.974, "step": 3698, "task_loss": 3.3272159099578857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7561891461733214, "compression/movement_sparsity/importance_threshold": -0.001734250537420724, "compression/movement_sparsity/linear_layer_sparsity": 0.7518207488529904, "compression/movement_sparsity/model_sparsity": 0.725993393258739, "compression_loss": 80.79996490478516, "distillation_loss": 3.462409734725952, "epoch": 3.13, "learning_rate": 3.818446510754203e-05, "loss": 83.9037, "step": 3699, "task_loss": 2.8909013271331787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7565186202555162, "compression/movement_sparsity/importance_threshold": -0.001731906955930629, "compression/movement_sparsity/linear_layer_sparsity": 0.7520164840647332, "compression/movement_sparsity/model_sparsity": 0.7261824043638064, "compression_loss": 80.83474731445312, "distillation_loss": 3.494741916656494, "epoch": 3.13, "learning_rate": 3.8179768948999725e-05, "loss": 84.3921, "step": 3700, "task_loss": 1.620486855506897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7568477973804688, "compression/movement_sparsity/importance_threshold": -0.0017295654867265494, "compression/movement_sparsity/linear_layer_sparsity": 0.7523833191578828, "compression/movement_sparsity/model_sparsity": 0.7265366375429897, "compression_loss": 80.8695068359375, "distillation_loss": 3.370675802230835, "epoch": 3.13, "learning_rate": 3.8175072790457404e-05, "loss": 84.6961, "step": 3701, "task_loss": 2.898996353149414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7571766776820649, "compression/movement_sparsity/importance_threshold": -0.001727226128856142, "compression/movement_sparsity/linear_layer_sparsity": 0.7527571895099378, "compression/movement_sparsity/model_sparsity": 0.7268976642982917, "compression_loss": 80.9041976928711, "distillation_loss": 3.230741500854492, "epoch": 3.13, "learning_rate": 3.81703766319151e-05, "loss": 83.9915, "step": 3702, "task_loss": 2.500800609588623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.757505261294189, "compression/movement_sparsity/importance_threshold": -0.0017248888813670739, "compression/movement_sparsity/linear_layer_sparsity": 0.7531977040349094, "compression/movement_sparsity/model_sparsity": 0.7273230457941494, "compression_loss": 80.9388656616211, "distillation_loss": 3.603447675704956, "epoch": 3.13, "learning_rate": 3.8165680473372784e-05, "loss": 84.2546, "step": 3703, "task_loss": 1.6398744583129883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.757833548350727, "compression/movement_sparsity/importance_threshold": -0.0017225537433070027, "compression/movement_sparsity/linear_layer_sparsity": 0.7535055025740939, "compression/movement_sparsity/model_sparsity": 0.7276202705066139, "compression_loss": 80.97348022460938, "distillation_loss": 3.7973155975341797, "epoch": 3.13, "learning_rate": 3.816098431483047e-05, "loss": 84.622, "step": 3704, "task_loss": 1.6398524045944214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7581615389855635, "compression/movement_sparsity/importance_threshold": -0.0017202207137235921, "compression/movement_sparsity/linear_layer_sparsity": 0.753880839598768, "compression/movement_sparsity/model_sparsity": 0.7279827135498187, "compression_loss": 81.0080795288086, "distillation_loss": 3.1367101669311523, "epoch": 3.13, "learning_rate": 3.8156288156288156e-05, "loss": 84.76, "step": 3705, "task_loss": 2.2595574855804443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.758489233332584, "compression/movement_sparsity/importance_threshold": -0.0017178897916645024, "compression/movement_sparsity/linear_layer_sparsity": 0.7542178165761575, "compression/movement_sparsity/model_sparsity": 0.7283081143313724, "compression_loss": 81.04261779785156, "distillation_loss": 4.8834710121154785, "epoch": 3.13, "learning_rate": 3.815159199774584e-05, "loss": 84.9586, "step": 3706, "task_loss": 2.3884599208831787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7588166315256731, "compression/movement_sparsity/importance_threshold": -0.0017155609761773999, "compression/movement_sparsity/linear_layer_sparsity": 0.7545884197062801, "compression/movement_sparsity/model_sparsity": 0.7286659861038667, "compression_loss": 81.0771713256836, "distillation_loss": 3.587491035461426, "epoch": 3.13, "learning_rate": 3.8146895839203536e-05, "loss": 84.2994, "step": 3707, "task_loss": 2.987438917160034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7591437336987167, "compression/movement_sparsity/importance_threshold": -0.0017132342663099404, "compression/movement_sparsity/linear_layer_sparsity": 0.7549149391886529, "compression/movement_sparsity/model_sparsity": 0.7289812886375286, "compression_loss": 81.11162567138672, "distillation_loss": 2.6389920711517334, "epoch": 3.13, "learning_rate": 3.814219968066122e-05, "loss": 84.3152, "step": 3708, "task_loss": 0.704338014125824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7594705399855991, "compression/movement_sparsity/importance_threshold": -0.001710909661109791, "compression/movement_sparsity/linear_layer_sparsity": 0.7553373051304828, "compression/movement_sparsity/model_sparsity": 0.729389145009907, "compression_loss": 81.14608764648438, "distillation_loss": 3.2328598499298096, "epoch": 3.14, "learning_rate": 3.813750352211891e-05, "loss": 84.7142, "step": 3709, "task_loss": 1.9908359050750732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.759797050520206, "compression/movement_sparsity/importance_threshold": -0.0017085871596246105, "compression/movement_sparsity/linear_layer_sparsity": 0.7556125745403567, "compression/movement_sparsity/model_sparsity": 0.729654958068724, "compression_loss": 81.18049621582031, "distillation_loss": 4.0685715675354, "epoch": 3.14, "learning_rate": 3.8132807363576595e-05, "loss": 84.5475, "step": 3710, "task_loss": 1.8876299858093262 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7601232654364226, "compression/movement_sparsity/importance_threshold": -0.0017062667609020607, "compression/movement_sparsity/linear_layer_sparsity": 0.7559872318874755, "compression/movement_sparsity/model_sparsity": 0.7300167447833885, "compression_loss": 81.21489715576172, "distillation_loss": 2.992877721786499, "epoch": 3.14, "learning_rate": 3.812811120503428e-05, "loss": 84.7944, "step": 3711, "task_loss": 2.642707109451294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7604491848681336, "compression/movement_sparsity/importance_threshold": -0.0017039484639898052, "compression/movement_sparsity/linear_layer_sparsity": 0.7564096932226465, "compression/movement_sparsity/model_sparsity": 0.7304246932720533, "compression_loss": 81.24922943115234, "distillation_loss": 4.781485080718994, "epoch": 3.14, "learning_rate": 3.8123415046491974e-05, "loss": 84.8305, "step": 3712, "task_loss": 2.0399160385131836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7607748089492248, "compression/movement_sparsity/importance_threshold": -0.0017016322679355026, "compression/movement_sparsity/linear_layer_sparsity": 0.7566910201096795, "compression/movement_sparsity/model_sparsity": 0.7306963557150539, "compression_loss": 81.28347778320312, "distillation_loss": 4.5113630294799805, "epoch": 3.14, "learning_rate": 3.811871888794966e-05, "loss": 84.7369, "step": 3713, "task_loss": 3.0370616912841797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7611001378135805, "compression/movement_sparsity/importance_threshold": -0.0016993181717868192, "compression/movement_sparsity/linear_layer_sparsity": 0.7570260176752414, "compression/movement_sparsity/model_sparsity": 0.7310198450836657, "compression_loss": 81.31782531738281, "distillation_loss": 4.1950459480285645, "epoch": 3.14, "learning_rate": 3.811402272940735e-05, "loss": 84.7956, "step": 3714, "task_loss": 1.8890365362167358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7614251715950866, "compression/movement_sparsity/importance_threshold": -0.0016970061745914134, "compression/movement_sparsity/linear_layer_sparsity": 0.7572710116234879, "compression/movement_sparsity/model_sparsity": 0.7312564227361004, "compression_loss": 81.3520736694336, "distillation_loss": 3.513200283050537, "epoch": 3.14, "learning_rate": 3.810932657086503e-05, "loss": 85.0875, "step": 3715, "task_loss": 1.3557666540145874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7617499104276276, "compression/movement_sparsity/importance_threshold": -0.0016946962753969508, "compression/movement_sparsity/linear_layer_sparsity": 0.7576018476545449, "compression/movement_sparsity/model_sparsity": 0.7315758935317199, "compression_loss": 81.38632202148438, "distillation_loss": 3.6532046794891357, "epoch": 3.14, "learning_rate": 3.810463041232272e-05, "loss": 84.5498, "step": 3716, "task_loss": 2.094719409942627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7620743544450893, "compression/movement_sparsity/importance_threshold": -0.001692388473251088, "compression/movement_sparsity/linear_layer_sparsity": 0.7579716995621064, "compression/movement_sparsity/model_sparsity": 0.7319330398884591, "compression_loss": 81.42049407958984, "distillation_loss": 4.121281623840332, "epoch": 3.14, "learning_rate": 3.809993425378041e-05, "loss": 85.1911, "step": 3717, "task_loss": 2.840620994567871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7623985037813562, "compression/movement_sparsity/importance_threshold": -0.0016900827672014914, "compression/movement_sparsity/linear_layer_sparsity": 0.75822477809601, "compression/movement_sparsity/model_sparsity": 0.7321774243961625, "compression_loss": 81.45464324951172, "distillation_loss": 3.556492567062378, "epoch": 3.14, "learning_rate": 3.809523809523809e-05, "loss": 85.124, "step": 3718, "task_loss": 2.001060962677002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7627223585703138, "compression/movement_sparsity/importance_threshold": -0.0016877791562958213, "compression/movement_sparsity/linear_layer_sparsity": 0.7585774949746787, "compression/movement_sparsity/model_sparsity": 0.732518024364965, "compression_loss": 81.4887466430664, "distillation_loss": 3.9650063514709473, "epoch": 3.14, "learning_rate": 3.8090541936695785e-05, "loss": 85.4726, "step": 3719, "task_loss": 2.1661460399627686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7630459189458474, "compression/movement_sparsity/importance_threshold": -0.0016854776395817369, "compression/movement_sparsity/linear_layer_sparsity": 0.7589320839476664, "compression/movement_sparsity/model_sparsity": 0.7328604321158873, "compression_loss": 81.52286529541016, "distillation_loss": 2.8367667198181152, "epoch": 3.14, "learning_rate": 3.808584577815347e-05, "loss": 84.6481, "step": 3720, "task_loss": 1.4460798501968384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7633691850418416, "compression/movement_sparsity/importance_threshold": -0.0016831782161069054, "compression/movement_sparsity/linear_layer_sparsity": 0.7593019239310603, "compression/movement_sparsity/model_sparsity": 0.7332175669580907, "compression_loss": 81.55697631835938, "distillation_loss": 3.917475938796997, "epoch": 3.15, "learning_rate": 3.8081149619611165e-05, "loss": 85.1569, "step": 3721, "task_loss": 2.3046021461486816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7636921569921822, "compression/movement_sparsity/importance_threshold": -0.0016808808849189828, "compression/movement_sparsity/linear_layer_sparsity": 0.7595411227338358, "compression/movement_sparsity/model_sparsity": 0.7334485485461292, "compression_loss": 81.59103393554688, "distillation_loss": 2.6403896808624268, "epoch": 3.15, "learning_rate": 3.8076453461068844e-05, "loss": 84.4035, "step": 3722, "task_loss": 2.556107759475708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7640148349307536, "compression/movement_sparsity/importance_threshold": -0.001678585645065637, "compression/movement_sparsity/linear_layer_sparsity": 0.7599231849890564, "compression/movement_sparsity/model_sparsity": 0.7338174857875222, "compression_loss": 81.62503051757812, "distillation_loss": 2.5130982398986816, "epoch": 3.15, "learning_rate": 3.807175730252653e-05, "loss": 84.5074, "step": 3723, "task_loss": 1.1499379873275757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7643372189914417, "compression/movement_sparsity/importance_threshold": -0.001676292495594524, "compression/movement_sparsity/linear_layer_sparsity": 0.760157566428107, "compression/movement_sparsity/model_sparsity": 0.7340438155030996, "compression_loss": 81.65894317626953, "distillation_loss": 4.13726806640625, "epoch": 3.15, "learning_rate": 3.8067061143984224e-05, "loss": 85.7924, "step": 3724, "task_loss": 2.11171555519104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7646593093081311, "compression/movement_sparsity/importance_threshold": -0.00167400143555331, "compression/movement_sparsity/linear_layer_sparsity": 0.7604550505622865, "compression/movement_sparsity/model_sparsity": 0.7343310801421018, "compression_loss": 81.69292449951172, "distillation_loss": 2.574204444885254, "epoch": 3.15, "learning_rate": 3.806236498544191e-05, "loss": 85.2989, "step": 3725, "task_loss": 1.7083686590194702 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7649811060147073, "compression/movement_sparsity/importance_threshold": -0.0016717124639896537, "compression/movement_sparsity/linear_layer_sparsity": 0.7608336548088928, "compression/movement_sparsity/model_sparsity": 0.7346966781681142, "compression_loss": 81.72682189941406, "distillation_loss": 5.347417831420898, "epoch": 3.15, "learning_rate": 3.80576688268996e-05, "loss": 85.7211, "step": 3726, "task_loss": 2.96580171585083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.765302609245055, "compression/movement_sparsity/importance_threshold": -0.0016694255799512203, "compression/movement_sparsity/linear_layer_sparsity": 0.7611189762920837, "compression/movement_sparsity/model_sparsity": 0.7349721979806061, "compression_loss": 81.76077270507812, "distillation_loss": 2.876391887664795, "epoch": 3.15, "learning_rate": 3.805297266835728e-05, "loss": 84.9774, "step": 3727, "task_loss": 0.9849065542221069 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7656238191330595, "compression/movement_sparsity/importance_threshold": -0.001667140782485671, "compression/movement_sparsity/linear_layer_sparsity": 0.7614222674959019, "compression/movement_sparsity/model_sparsity": 0.7352650701985401, "compression_loss": 81.79466247558594, "distillation_loss": 2.28379487991333, "epoch": 3.15, "learning_rate": 3.8048276509814976e-05, "loss": 85.2204, "step": 3728, "task_loss": 1.8168145418167114 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7659447358126066, "compression/movement_sparsity/importance_threshold": -0.0016648580706406626, "compression/movement_sparsity/linear_layer_sparsity": 0.76160223895803, "compression/movement_sparsity/model_sparsity": 0.7354388590872872, "compression_loss": 81.8284912109375, "distillation_loss": 2.8096814155578613, "epoch": 3.15, "learning_rate": 3.804358035127266e-05, "loss": 85.242, "step": 3729, "task_loss": 1.8075639009475708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7662653594175803, "compression/movement_sparsity/importance_threshold": -0.0016625774434638639, "compression/movement_sparsity/linear_layer_sparsity": 0.7617260833630961, "compression/movement_sparsity/model_sparsity": 0.7355584490560493, "compression_loss": 81.86229705810547, "distillation_loss": 3.2911016941070557, "epoch": 3.15, "learning_rate": 3.803888419273035e-05, "loss": 84.8796, "step": 3730, "task_loss": 2.3593766689300537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7665856900818668, "compression/movement_sparsity/importance_threshold": -0.0016602989000029317, "compression/movement_sparsity/linear_layer_sparsity": 0.7619747976516449, "compression/movement_sparsity/model_sparsity": 0.7357986192436519, "compression_loss": 81.89608764648438, "distillation_loss": 4.3141255378723145, "epoch": 3.15, "learning_rate": 3.8034188034188035e-05, "loss": 86.0581, "step": 3731, "task_loss": 2.17014479637146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7669057279393504, "compression/movement_sparsity/importance_threshold": -0.0016580224393055324, "compression/movement_sparsity/linear_layer_sparsity": 0.7622077958872496, "compression/movement_sparsity/model_sparsity": 0.7360236132730772, "compression_loss": 81.92984771728516, "distillation_loss": 4.341241836547852, "epoch": 3.15, "learning_rate": 3.802949187564572e-05, "loss": 85.5641, "step": 3732, "task_loss": 1.946496605873108 }, { "compression/movement_sparsity/importance_regularization_factor": 0.767225473123917, "compression/movement_sparsity/importance_threshold": -0.0016557480604193217, "compression/movement_sparsity/linear_layer_sparsity": 0.7624712722953316, "compression/movement_sparsity/model_sparsity": 0.7362780384559934, "compression_loss": 81.96355438232422, "distillation_loss": 3.10322642326355, "epoch": 3.16, "learning_rate": 3.8024795717103414e-05, "loss": 84.8831, "step": 3733, "task_loss": 1.5953773260116577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7675449257694509, "compression/movement_sparsity/importance_threshold": -0.0016534757623919687, "compression/movement_sparsity/linear_layer_sparsity": 0.7627380874703518, "compression/movement_sparsity/model_sparsity": 0.736535687708932, "compression_loss": 81.99726867675781, "distillation_loss": 4.254807949066162, "epoch": 3.16, "learning_rate": 3.80200995585611e-05, "loss": 85.5881, "step": 3734, "task_loss": 3.2234244346618652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.767864086009838, "compression/movement_sparsity/importance_threshold": -0.0016512055442711292, "compression/movement_sparsity/linear_layer_sparsity": 0.7631509617747435, "compression/movement_sparsity/model_sparsity": 0.736934378510818, "compression_loss": 82.0309066772461, "distillation_loss": 4.771909713745117, "epoch": 3.16, "learning_rate": 3.801540340001879e-05, "loss": 85.258, "step": 3735, "task_loss": 2.4560282230377197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.768182953978963, "compression/movement_sparsity/importance_threshold": -0.0016489374051044687, "compression/movement_sparsity/linear_layer_sparsity": 0.7634941035468007, "compression/movement_sparsity/model_sparsity": 0.7372657323073775, "compression_loss": 82.06455993652344, "distillation_loss": 3.2031946182250977, "epoch": 3.16, "learning_rate": 3.8010707241476473e-05, "loss": 85.4798, "step": 3736, "task_loss": 2.215200424194336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7685015298107113, "compression/movement_sparsity/importance_threshold": -0.0016466713439396465, "compression/movement_sparsity/linear_layer_sparsity": 0.7637781252957193, "compression/movement_sparsity/model_sparsity": 0.7375399970354676, "compression_loss": 82.0981674194336, "distillation_loss": 3.0737464427948, "epoch": 3.16, "learning_rate": 3.800601108293416e-05, "loss": 85.4959, "step": 3737, "task_loss": 2.2181127071380615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7688198136389677, "compression/movement_sparsity/importance_threshold": -0.001644407359824327, "compression/movement_sparsity/linear_layer_sparsity": 0.7640590944577232, "compression/movement_sparsity/model_sparsity": 0.7378113140423944, "compression_loss": 82.13170623779297, "distillation_loss": 3.3199119567871094, "epoch": 3.16, "learning_rate": 3.800131492439185e-05, "loss": 85.5531, "step": 3738, "task_loss": 2.5599024295806885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7691378055976175, "compression/movement_sparsity/importance_threshold": -0.0016421454518061717, "compression/movement_sparsity/linear_layer_sparsity": 0.7642557955270444, "compression/movement_sparsity/model_sparsity": 0.7380012578248611, "compression_loss": 82.16523742675781, "distillation_loss": 2.0669779777526855, "epoch": 3.16, "learning_rate": 3.799661876584953e-05, "loss": 85.0019, "step": 3739, "task_loss": 1.7957483530044556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7694555058205462, "compression/movement_sparsity/importance_threshold": -0.0016398856189328388, "compression/movement_sparsity/linear_layer_sparsity": 0.7645895291308369, "compression/movement_sparsity/model_sparsity": 0.7383235266526788, "compression_loss": 82.19873046875, "distillation_loss": 5.237563133239746, "epoch": 3.16, "learning_rate": 3.7991922607307226e-05, "loss": 86.1539, "step": 3740, "task_loss": 4.006217002868652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7697729144416383, "compression/movement_sparsity/importance_threshold": -0.0016376278602519964, "compression/movement_sparsity/linear_layer_sparsity": 0.7648524570272077, "compression/movement_sparsity/model_sparsity": 0.7385774221669483, "compression_loss": 82.23220825195312, "distillation_loss": 3.00445556640625, "epoch": 3.16, "learning_rate": 3.798722644876491e-05, "loss": 85.7181, "step": 3741, "task_loss": 1.352960467338562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7700900315947796, "compression/movement_sparsity/importance_threshold": -0.0016353721748112995, "compression/movement_sparsity/linear_layer_sparsity": 0.7651924627431768, "compression/movement_sparsity/model_sparsity": 0.7389057476405939, "compression_loss": 82.26568603515625, "distillation_loss": 4.299252510070801, "epoch": 3.16, "learning_rate": 3.79825302902226e-05, "loss": 85.6853, "step": 3742, "task_loss": 2.3323781490325928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7704068574138547, "compression/movement_sparsity/importance_threshold": -0.0016331185616584153, "compression/movement_sparsity/linear_layer_sparsity": 0.7655401953197738, "compression/movement_sparsity/model_sparsity": 0.7392415345334343, "compression_loss": 82.29906463623047, "distillation_loss": 4.221470832824707, "epoch": 3.16, "learning_rate": 3.797783413168029e-05, "loss": 86.0414, "step": 3743, "task_loss": 2.1598072052001953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7707233920327491, "compression/movement_sparsity/importance_threshold": -0.0016308670198410032, "compression/movement_sparsity/linear_layer_sparsity": 0.7656775617309389, "compression/movement_sparsity/model_sparsity": 0.7393741819857874, "compression_loss": 82.33245086669922, "distillation_loss": 3.872781753540039, "epoch": 3.16, "learning_rate": 3.797313797313797e-05, "loss": 85.9645, "step": 3744, "task_loss": 2.6903269290924072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7710396355853476, "compression/movement_sparsity/importance_threshold": -0.001628617548406726, "compression/movement_sparsity/linear_layer_sparsity": 0.7661312524611482, "compression/movement_sparsity/model_sparsity": 0.7398122870436981, "compression_loss": 82.36577606201172, "distillation_loss": 5.149214744567871, "epoch": 3.17, "learning_rate": 3.7968441814595664e-05, "loss": 86.2293, "step": 3745, "task_loss": 2.8685309886932373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7713555882055358, "compression/movement_sparsity/importance_threshold": -0.001626370146403243, "compression/movement_sparsity/linear_layer_sparsity": 0.7664728321672452, "compression/movement_sparsity/model_sparsity": 0.7401421324360685, "compression_loss": 82.39913940429688, "distillation_loss": 3.2948648929595947, "epoch": 3.17, "learning_rate": 3.796374565605335e-05, "loss": 85.5033, "step": 3746, "task_loss": 2.4363298416137695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7716712500271983, "compression/movement_sparsity/importance_threshold": -0.0016241248128782207, "compression/movement_sparsity/linear_layer_sparsity": 0.7668347545033288, "compression/movement_sparsity/model_sparsity": 0.7404916216265044, "compression_loss": 82.43234252929688, "distillation_loss": 2.370687484741211, "epoch": 3.17, "learning_rate": 3.795904949751104e-05, "loss": 86.1328, "step": 3747, "task_loss": 1.7061151266098022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7719866211842208, "compression/movement_sparsity/importance_threshold": -0.0016218815468793165, "compression/movement_sparsity/linear_layer_sparsity": 0.7671009973183024, "compression/movement_sparsity/model_sparsity": 0.7407487181817249, "compression_loss": 82.46556091308594, "distillation_loss": 3.172405242919922, "epoch": 3.17, "learning_rate": 3.795435333896872e-05, "loss": 85.7905, "step": 3748, "task_loss": 2.187551975250244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7723017018104879, "compression/movement_sparsity/importance_threshold": -0.001619640347454196, "compression/movement_sparsity/linear_layer_sparsity": 0.7674640882228144, "compression/movement_sparsity/model_sparsity": 0.7410993357966686, "compression_loss": 82.49880981445312, "distillation_loss": 4.819858551025391, "epoch": 3.17, "learning_rate": 3.794965718042641e-05, "loss": 86.1583, "step": 3749, "task_loss": 2.943607807159424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7726164920398849, "compression/movement_sparsity/importance_threshold": -0.0016174012136505184, "compression/movement_sparsity/linear_layer_sparsity": 0.7678793712090686, "compression/movement_sparsity/model_sparsity": 0.7415003525347852, "compression_loss": 82.53202819824219, "distillation_loss": 4.691766738891602, "epoch": 3.17, "learning_rate": 3.79449610218841e-05, "loss": 86.5314, "step": 3750, "task_loss": 1.4910677671432495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7729309920062974, "compression/movement_sparsity/importance_threshold": -0.0016151641445159449, "compression/movement_sparsity/linear_layer_sparsity": 0.7682913869733906, "compression/movement_sparsity/model_sparsity": 0.741898214290094, "compression_loss": 82.56513214111328, "distillation_loss": 3.3013792037963867, "epoch": 3.17, "learning_rate": 3.794026486334179e-05, "loss": 85.8736, "step": 3751, "task_loss": 2.02673602104187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7732452018436099, "compression/movement_sparsity/importance_threshold": -0.001612929139098141, "compression/movement_sparsity/linear_layer_sparsity": 0.7684979612535143, "compression/movement_sparsity/model_sparsity": 0.7420976921081985, "compression_loss": 82.59825134277344, "distillation_loss": 4.060705661773682, "epoch": 3.17, "learning_rate": 3.7935568704799475e-05, "loss": 86.1419, "step": 3752, "task_loss": 2.335036039352417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7735591216857081, "compression/movement_sparsity/importance_threshold": -0.001610696196444765, "compression/movement_sparsity/linear_layer_sparsity": 0.7687978063728858, "compression/movement_sparsity/model_sparsity": 0.742387236625288, "compression_loss": 82.6313247680664, "distillation_loss": 3.783721685409546, "epoch": 3.17, "learning_rate": 3.793087254625716e-05, "loss": 86.659, "step": 3753, "task_loss": 1.7074366807937622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7738727516664765, "compression/movement_sparsity/importance_threshold": -0.0016084653156034807, "compression/movement_sparsity/linear_layer_sparsity": 0.7692516401931067, "compression/movement_sparsity/model_sparsity": 0.7428254798576283, "compression_loss": 82.66436767578125, "distillation_loss": 3.121267557144165, "epoch": 3.17, "learning_rate": 3.792617638771485e-05, "loss": 86.0574, "step": 3754, "task_loss": 1.1097837686538696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7741860919198009, "compression/movement_sparsity/importance_threshold": -0.0016062364956219485, "compression/movement_sparsity/linear_layer_sparsity": 0.7693559766599205, "compression/movement_sparsity/model_sparsity": 0.7429262320458305, "compression_loss": 82.6974105834961, "distillation_loss": 3.7139809131622314, "epoch": 3.17, "learning_rate": 3.792148022917254e-05, "loss": 86.5931, "step": 3755, "task_loss": 2.1565589904785156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.774499142579566, "compression/movement_sparsity/importance_threshold": -0.0016040097355478318, "compression/movement_sparsity/linear_layer_sparsity": 0.7695922063449545, "compression/movement_sparsity/model_sparsity": 0.7431543465144562, "compression_loss": 82.73040771484375, "distillation_loss": 3.245901107788086, "epoch": 3.17, "learning_rate": 3.791678407063022e-05, "loss": 85.7282, "step": 3756, "task_loss": 2.0938966274261475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7748119037796571, "compression/movement_sparsity/importance_threshold": -0.001601785034428792, "compression/movement_sparsity/linear_layer_sparsity": 0.7697426535680162, "compression/movement_sparsity/model_sparsity": 0.7432996254125759, "compression_loss": 82.76336669921875, "distillation_loss": 3.0150980949401855, "epoch": 3.18, "learning_rate": 3.7912087912087914e-05, "loss": 86.465, "step": 3757, "task_loss": 1.6477210521697998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7751243756539593, "compression/movement_sparsity/importance_threshold": -0.00159956239131249, "compression/movement_sparsity/linear_layer_sparsity": 0.7700432022132782, "compression/movement_sparsity/model_sparsity": 0.7435898492872773, "compression_loss": 82.79627990722656, "distillation_loss": 4.018149375915527, "epoch": 3.18, "learning_rate": 3.79073917535456e-05, "loss": 86.5791, "step": 3758, "task_loss": 1.7445772886276245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7754365583363576, "compression/movement_sparsity/importance_threshold": -0.0015973418052465903, "compression/movement_sparsity/linear_layer_sparsity": 0.7702992260165877, "compression/movement_sparsity/model_sparsity": 0.7438370778853218, "compression_loss": 82.82923889160156, "distillation_loss": 4.584939002990723, "epoch": 3.18, "learning_rate": 3.790269559500329e-05, "loss": 86.6273, "step": 3759, "task_loss": 2.0394046306610107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7757484519607377, "compression/movement_sparsity/importance_threshold": -0.0015951232752787498, "compression/movement_sparsity/linear_layer_sparsity": 0.7704945558066308, "compression/movement_sparsity/model_sparsity": 0.7440256974961722, "compression_loss": 82.862060546875, "distillation_loss": 3.8686866760253906, "epoch": 3.18, "learning_rate": 3.789799943646098e-05, "loss": 86.5256, "step": 3760, "task_loss": 2.292780876159668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.776060056660984, "compression/movement_sparsity/importance_threshold": -0.0015929068004566357, "compression/movement_sparsity/linear_layer_sparsity": 0.7707459530328977, "compression/movement_sparsity/model_sparsity": 0.7442684584543285, "compression_loss": 82.89492797851562, "distillation_loss": 3.681800365447998, "epoch": 3.18, "learning_rate": 3.7893303277918666e-05, "loss": 87.0874, "step": 3761, "task_loss": 1.5389683246612549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7763713725709822, "compression/movement_sparsity/importance_threshold": -0.0015906923798279056, "compression/movement_sparsity/linear_layer_sparsity": 0.7709345575923942, "compression/movement_sparsity/model_sparsity": 0.7444505838669908, "compression_loss": 82.92780303955078, "distillation_loss": 2.937018632888794, "epoch": 3.18, "learning_rate": 3.788860711937635e-05, "loss": 86.8643, "step": 3762, "task_loss": 1.996913194656372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7766823998246171, "compression/movement_sparsity/importance_threshold": -0.0015884800124402248, "compression/movement_sparsity/linear_layer_sparsity": 0.7712019808999637, "compression/movement_sparsity/model_sparsity": 0.744708820361255, "compression_loss": 82.96063995361328, "distillation_loss": 2.909369468688965, "epoch": 3.18, "learning_rate": 3.788391096083404e-05, "loss": 85.8918, "step": 3763, "task_loss": 1.9325690269470215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7769931385557741, "compression/movement_sparsity/importance_threshold": -0.0015862696973412528, "compression/movement_sparsity/linear_layer_sparsity": 0.7715168981672268, "compression/movement_sparsity/model_sparsity": 0.7450129192515887, "compression_loss": 82.99341583251953, "distillation_loss": 2.8570542335510254, "epoch": 3.18, "learning_rate": 3.787921480229173e-05, "loss": 86.278, "step": 3764, "task_loss": 1.5680291652679443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7773035888983381, "compression/movement_sparsity/importance_threshold": -0.0015840614335786525, "compression/movement_sparsity/linear_layer_sparsity": 0.7716780413686579, "compression/movement_sparsity/model_sparsity": 0.7451685266883162, "compression_loss": 83.02616882324219, "distillation_loss": 5.572265148162842, "epoch": 3.18, "learning_rate": 3.787451864374941e-05, "loss": 87.01, "step": 3765, "task_loss": 3.4366517066955566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7776137509861945, "compression/movement_sparsity/importance_threshold": -0.0015818552202000848, "compression/movement_sparsity/linear_layer_sparsity": 0.7720839400349826, "compression/movement_sparsity/model_sparsity": 0.7455604814867623, "compression_loss": 83.05887603759766, "distillation_loss": 4.592160224914551, "epoch": 3.18, "learning_rate": 3.7869822485207104e-05, "loss": 86.444, "step": 3766, "task_loss": 3.680501937866211 }, { "compression/movement_sparsity/importance_regularization_factor": 0.777923624953228, "compression/movement_sparsity/importance_threshold": -0.0015796510562532135, "compression/movement_sparsity/linear_layer_sparsity": 0.772430253635631, "compression/movement_sparsity/model_sparsity": 0.7458948981498432, "compression_loss": 83.09147644042969, "distillation_loss": 3.987323760986328, "epoch": 3.18, "learning_rate": 3.786512632666479e-05, "loss": 87.0949, "step": 3767, "task_loss": 2.0656306743621826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7782332109333243, "compression/movement_sparsity/importance_threshold": -0.001577448940785698, "compression/movement_sparsity/linear_layer_sparsity": 0.7726652909039015, "compression/movement_sparsity/model_sparsity": 0.7461218611648894, "compression_loss": 83.12415313720703, "distillation_loss": 4.308029651641846, "epoch": 3.19, "learning_rate": 3.786043016812248e-05, "loss": 87.7146, "step": 3768, "task_loss": 3.6343369483947754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.778542509060368, "compression/movement_sparsity/importance_threshold": -0.0015752488728452028, "compression/movement_sparsity/linear_layer_sparsity": 0.7729251900616928, "compression/movement_sparsity/model_sparsity": 0.7463728319870673, "compression_loss": 83.15674591064453, "distillation_loss": 4.713467121124268, "epoch": 3.19, "learning_rate": 3.785573400958016e-05, "loss": 87.0005, "step": 3769, "task_loss": 1.8891173601150513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7788515194682446, "compression/movement_sparsity/importance_threshold": -0.0015730508514793882, "compression/movement_sparsity/linear_layer_sparsity": 0.7731310965884288, "compression/movement_sparsity/model_sparsity": 0.7465716649911673, "compression_loss": 83.18929290771484, "distillation_loss": 3.2644171714782715, "epoch": 3.19, "learning_rate": 3.785103785103785e-05, "loss": 86.8815, "step": 3770, "task_loss": 1.2333226203918457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7791602422908394, "compression/movement_sparsity/importance_threshold": -0.0015708548757359143, "compression/movement_sparsity/linear_layer_sparsity": 0.773475848123117, "compression/movement_sparsity/model_sparsity": 0.7469045732500591, "compression_loss": 83.2218246459961, "distillation_loss": 3.995328903198242, "epoch": 3.19, "learning_rate": 3.784634169249554e-05, "loss": 87.3773, "step": 3771, "task_loss": 3.238503932952881 }, { "compression/movement_sparsity/importance_regularization_factor": 0.779468677662037, "compression/movement_sparsity/importance_threshold": -0.0015686609446624467, "compression/movement_sparsity/linear_layer_sparsity": 0.7737225353031677, "compression/movement_sparsity/model_sparsity": 0.7471427859665766, "compression_loss": 83.25433349609375, "distillation_loss": 2.448967695236206, "epoch": 3.19, "learning_rate": 3.784164553395323e-05, "loss": 86.8328, "step": 3772, "task_loss": 1.3854150772094727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.779776825715723, "compression/movement_sparsity/importance_threshold": -0.0015664690573066438, "compression/movement_sparsity/linear_layer_sparsity": 0.7740250275877543, "compression/movement_sparsity/model_sparsity": 0.7474348867106125, "compression_loss": 83.28678894042969, "distillation_loss": 3.930248260498047, "epoch": 3.19, "learning_rate": 3.7836949375410915e-05, "loss": 88.0036, "step": 3773, "task_loss": 2.3609800338745117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7800846865857822, "compression/movement_sparsity/importance_threshold": -0.001564279212716171, "compression/movement_sparsity/linear_layer_sparsity": 0.7743370711306171, "compression/movement_sparsity/model_sparsity": 0.7477362105978198, "compression_loss": 83.31926727294922, "distillation_loss": 3.1758885383605957, "epoch": 3.19, "learning_rate": 3.78322532168686e-05, "loss": 86.9492, "step": 3774, "task_loss": 2.0812652111053467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7803922604061002, "compression/movement_sparsity/importance_threshold": -0.0015620914099386861, "compression/movement_sparsity/linear_layer_sparsity": 0.77466797870668, "compression/movement_sparsity/model_sparsity": 0.7480557504806541, "compression_loss": 83.35174560546875, "distillation_loss": 3.681088447570801, "epoch": 3.19, "learning_rate": 3.782755705832629e-05, "loss": 87.2559, "step": 3775, "task_loss": 1.9181817770004272 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7806995473105616, "compression/movement_sparsity/importance_threshold": -0.0015599056480218544, "compression/movement_sparsity/linear_layer_sparsity": 0.7748734678875489, "compression/movement_sparsity/model_sparsity": 0.7482541804760015, "compression_loss": 83.38412475585938, "distillation_loss": 4.61586332321167, "epoch": 3.19, "learning_rate": 3.782286089978398e-05, "loss": 87.1899, "step": 3776, "task_loss": 1.8805286884307861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7810065474330519, "compression/movement_sparsity/importance_threshold": -0.001557721926013336, "compression/movement_sparsity/linear_layer_sparsity": 0.7751282992740949, "compression/movement_sparsity/model_sparsity": 0.7485002576204666, "compression_loss": 83.41651916503906, "distillation_loss": 4.8251237869262695, "epoch": 3.19, "learning_rate": 3.781816474124167e-05, "loss": 87.7189, "step": 3777, "task_loss": 3.447066307067871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.781313260907456, "compression/movement_sparsity/importance_threshold": -0.001555540242960794, "compression/movement_sparsity/linear_layer_sparsity": 0.775386827152608, "compression/movement_sparsity/model_sparsity": 0.748749904271028, "compression_loss": 83.44877624511719, "distillation_loss": 4.925800800323486, "epoch": 3.19, "learning_rate": 3.7813468582699354e-05, "loss": 87.1656, "step": 3778, "task_loss": 2.326029062271118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7816196878676595, "compression/movement_sparsity/importance_threshold": -0.0015533605979118868, "compression/movement_sparsity/linear_layer_sparsity": 0.7757308990097408, "compression/movement_sparsity/model_sparsity": 0.7490821562013795, "compression_loss": 83.48115539550781, "distillation_loss": 3.7273478507995605, "epoch": 3.19, "learning_rate": 3.780877242415704e-05, "loss": 87.3106, "step": 3779, "task_loss": 2.0109968185424805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7819258284475468, "compression/movement_sparsity/importance_threshold": -0.0015511829899142824, "compression/movement_sparsity/linear_layer_sparsity": 0.7759474657423433, "compression/movement_sparsity/model_sparsity": 0.74929128320048, "compression_loss": 83.51342010498047, "distillation_loss": 3.654419422149658, "epoch": 3.2, "learning_rate": 3.7804076265614727e-05, "loss": 87.5465, "step": 3780, "task_loss": 2.9865164756774902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7822316827810035, "compression/movement_sparsity/importance_threshold": -0.0015490074180156375, "compression/movement_sparsity/linear_layer_sparsity": 0.7761611825988809, "compression/movement_sparsity/model_sparsity": 0.7494976582255255, "compression_loss": 83.54572296142578, "distillation_loss": 4.67124080657959, "epoch": 3.2, "learning_rate": 3.779938010707242e-05, "loss": 87.9051, "step": 3781, "task_loss": 3.3263254165649414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7825372510019151, "compression/movement_sparsity/importance_threshold": -0.0015468338812636142, "compression/movement_sparsity/linear_layer_sparsity": 0.7765329542974319, "compression/movement_sparsity/model_sparsity": 0.7498566584225277, "compression_loss": 83.57785034179688, "distillation_loss": 3.634326457977295, "epoch": 3.2, "learning_rate": 3.77946839485301e-05, "loss": 87.0449, "step": 3782, "task_loss": 2.7825701236724854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.782842533244166, "compression/movement_sparsity/importance_threshold": -0.001544662378705877, "compression/movement_sparsity/linear_layer_sparsity": 0.776760896685959, "compression/movement_sparsity/model_sparsity": 0.7500767702887762, "compression_loss": 83.6100845336914, "distillation_loss": 4.073667526245117, "epoch": 3.2, "learning_rate": 3.778998778998779e-05, "loss": 87.2854, "step": 3783, "task_loss": 2.5382847785949707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7831475296416418, "compression/movement_sparsity/importance_threshold": -0.0015424929093900861, "compression/movement_sparsity/linear_layer_sparsity": 0.7770903137410674, "compression/movement_sparsity/model_sparsity": 0.7503948708546362, "compression_loss": 83.64221954345703, "distillation_loss": 4.030433177947998, "epoch": 3.2, "learning_rate": 3.778529163144548e-05, "loss": 86.7367, "step": 3784, "task_loss": 1.7101396322250366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7834522403282274, "compression/movement_sparsity/importance_threshold": -0.0015403254723639044, "compression/movement_sparsity/linear_layer_sparsity": 0.7773812157347119, "compression/movement_sparsity/model_sparsity": 0.7506757794698797, "compression_loss": 83.67436218261719, "distillation_loss": 5.128954887390137, "epoch": 3.2, "learning_rate": 3.778059547290317e-05, "loss": 87.789, "step": 3785, "task_loss": 2.883740186691284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7837566654378082, "compression/movement_sparsity/importance_threshold": -0.0015381600666749921, "compression/movement_sparsity/linear_layer_sparsity": 0.7776379907605826, "compression/movement_sparsity/model_sparsity": 0.7509237334836794, "compression_loss": 83.70645904541016, "distillation_loss": 4.733552932739258, "epoch": 3.2, "learning_rate": 3.777589931436085e-05, "loss": 87.7268, "step": 3786, "task_loss": 2.499995708465576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.784060805104269, "compression/movement_sparsity/importance_threshold": -0.001535996691371013, "compression/movement_sparsity/linear_layer_sparsity": 0.7778980568567208, "compression/movement_sparsity/model_sparsity": 0.7511748655093583, "compression_loss": 83.7385482788086, "distillation_loss": 3.013167142868042, "epoch": 3.2, "learning_rate": 3.777120315581854e-05, "loss": 87.3609, "step": 3787, "task_loss": 1.5770586729049683 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7843646594614955, "compression/movement_sparsity/importance_threshold": -0.0015338353454996262, "compression/movement_sparsity/linear_layer_sparsity": 0.7781734216599357, "compression/movement_sparsity/model_sparsity": 0.7514407706844617, "compression_loss": 83.77058410644531, "distillation_loss": 4.462911128997803, "epoch": 3.2, "learning_rate": 3.776650699727623e-05, "loss": 87.2987, "step": 3788, "task_loss": 2.0275299549102783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7846682286433722, "compression/movement_sparsity/importance_threshold": -0.0015316760281084973, "compression/movement_sparsity/linear_layer_sparsity": 0.7784312698608936, "compression/movement_sparsity/model_sparsity": 0.7516897610064828, "compression_loss": 83.80261993408203, "distillation_loss": 4.686899185180664, "epoch": 3.2, "learning_rate": 3.776181083873392e-05, "loss": 87.0832, "step": 3789, "task_loss": 2.769773244857788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7849715127837844, "compression/movement_sparsity/importance_threshold": -0.0015295187382452858, "compression/movement_sparsity/linear_layer_sparsity": 0.7787146715530952, "compression/movement_sparsity/model_sparsity": 0.7519634269787117, "compression_loss": 83.8345947265625, "distillation_loss": 4.887333393096924, "epoch": 3.2, "learning_rate": 3.775711468019161e-05, "loss": 87.5981, "step": 3790, "task_loss": 2.021696090698242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7852745120166177, "compression/movement_sparsity/importance_threshold": -0.0015273634749576525, "compression/movement_sparsity/linear_layer_sparsity": 0.7788983991280285, "compression/movement_sparsity/model_sparsity": 0.752140842946234, "compression_loss": 83.86658477783203, "distillation_loss": 4.0106940269470215, "epoch": 3.2, "learning_rate": 3.775241852164929e-05, "loss": 87.5322, "step": 3791, "task_loss": 2.5875983238220215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7855772264757566, "compression/movement_sparsity/importance_threshold": -0.001525210237293263, "compression/movement_sparsity/linear_layer_sparsity": 0.7790392950928139, "compression/movement_sparsity/model_sparsity": 0.7522768987011822, "compression_loss": 83.89849853515625, "distillation_loss": 3.101524591445923, "epoch": 3.21, "learning_rate": 3.774772236310698e-05, "loss": 86.9115, "step": 3792, "task_loss": 1.0609230995178223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7858796562950868, "compression/movement_sparsity/importance_threshold": -0.0015230590242997742, "compression/movement_sparsity/linear_layer_sparsity": 0.7793424670549557, "compression/movement_sparsity/model_sparsity": 0.7525696557737585, "compression_loss": 83.93035888671875, "distillation_loss": 3.07387638092041, "epoch": 3.21, "learning_rate": 3.774302620456467e-05, "loss": 87.5048, "step": 3793, "task_loss": 1.3480068445205688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.786181801608493, "compression/movement_sparsity/importance_threshold": -0.0015209098350248522, "compression/movement_sparsity/linear_layer_sparsity": 0.7796104388742364, "compression/movement_sparsity/model_sparsity": 0.7528284219366691, "compression_loss": 83.96226501464844, "distillation_loss": 3.0989456176757812, "epoch": 3.21, "learning_rate": 3.7738330046022356e-05, "loss": 87.2904, "step": 3794, "task_loss": 1.2619060277938843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7864836625498608, "compression/movement_sparsity/importance_threshold": -0.0015187626685161555, "compression/movement_sparsity/linear_layer_sparsity": 0.7797955258042804, "compression/movement_sparsity/model_sparsity": 0.753007150561272, "compression_loss": 83.994140625, "distillation_loss": 2.288295269012451, "epoch": 3.21, "learning_rate": 3.773363388748004e-05, "loss": 87.561, "step": 3795, "task_loss": 1.9328750371932983 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7867852392530748, "compression/movement_sparsity/importance_threshold": -0.0015166175238213496, "compression/movement_sparsity/linear_layer_sparsity": 0.7800894923090073, "compression/movement_sparsity/model_sparsity": 0.7532910184122148, "compression_loss": 84.02592468261719, "distillation_loss": 2.515443801879883, "epoch": 3.21, "learning_rate": 3.772893772893773e-05, "loss": 87.2945, "step": 3796, "task_loss": 1.5559481382369995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7870865318520205, "compression/movement_sparsity/importance_threshold": -0.001514474399988093, "compression/movement_sparsity/linear_layer_sparsity": 0.7803783671941537, "compression/movement_sparsity/model_sparsity": 0.7535699695563733, "compression_loss": 84.05763244628906, "distillation_loss": 4.593984603881836, "epoch": 3.21, "learning_rate": 3.772424157039542e-05, "loss": 87.9846, "step": 3797, "task_loss": 1.8002078533172607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7873875404805828, "compression/movement_sparsity/importance_threshold": -0.0015123332960640511, "compression/movement_sparsity/linear_layer_sparsity": 0.7808053835613615, "compression/movement_sparsity/model_sparsity": 0.7539823165977116, "compression_loss": 84.0893325805664, "distillation_loss": 3.1223931312561035, "epoch": 3.21, "learning_rate": 3.771954541185311e-05, "loss": 87.1181, "step": 3798, "task_loss": 1.1258089542388916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.787688265272647, "compression/movement_sparsity/importance_threshold": -0.0015101942110968825, "compression/movement_sparsity/linear_layer_sparsity": 0.7811578261841747, "compression/movement_sparsity/model_sparsity": 0.7543226517321909, "compression_loss": 84.12100219726562, "distillation_loss": 3.3722105026245117, "epoch": 3.21, "learning_rate": 3.7714849253310794e-05, "loss": 87.9999, "step": 3799, "task_loss": 1.204504370689392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7879887063620985, "compression/movement_sparsity/importance_threshold": -0.001508057144134249, "compression/movement_sparsity/linear_layer_sparsity": 0.7814441731457824, "compression/movement_sparsity/model_sparsity": 0.754599161794761, "compression_loss": 84.15264892578125, "distillation_loss": 4.446327209472656, "epoch": 3.21, "learning_rate": 3.771015309476848e-05, "loss": 88.1351, "step": 3800, "task_loss": 2.9547367095947266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7882888638828219, "compression/movement_sparsity/importance_threshold": -0.0015059220942238152, "compression/movement_sparsity/linear_layer_sparsity": 0.7815182937718069, "compression/movement_sparsity/model_sparsity": 0.7546707361492598, "compression_loss": 84.18425750732422, "distillation_loss": 3.3591256141662598, "epoch": 3.21, "learning_rate": 3.770545693622617e-05, "loss": 87.1238, "step": 3801, "task_loss": 1.8607633113861084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7885887379687029, "compression/movement_sparsity/importance_threshold": -0.0015037890604132397, "compression/movement_sparsity/linear_layer_sparsity": 0.7818515265605588, "compression/movement_sparsity/model_sparsity": 0.7549925213665741, "compression_loss": 84.21576690673828, "distillation_loss": 3.1628201007843018, "epoch": 3.21, "learning_rate": 3.770076077768386e-05, "loss": 88.3456, "step": 3802, "task_loss": 2.3069770336151123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.788888328753626, "compression/movement_sparsity/importance_threshold": -0.0015016580417501878, "compression/movement_sparsity/linear_layer_sparsity": 0.7821353932952981, "compression/movement_sparsity/model_sparsity": 0.7552666364056989, "compression_loss": 84.2473373413086, "distillation_loss": 3.725578546524048, "epoch": 3.21, "learning_rate": 3.7696064619141546e-05, "loss": 88.0718, "step": 3803, "task_loss": 1.8034870624542236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7891876363714769, "compression/movement_sparsity/importance_threshold": -0.0014995290372823182, "compression/movement_sparsity/linear_layer_sparsity": 0.7824305402926209, "compression/movement_sparsity/model_sparsity": 0.7555516441956854, "compression_loss": 84.27890014648438, "distillation_loss": 3.9321656227111816, "epoch": 3.22, "learning_rate": 3.769136846059923e-05, "loss": 88.1337, "step": 3804, "task_loss": 3.0981173515319824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7894866609561405, "compression/movement_sparsity/importance_threshold": -0.0014974020460572944, "compression/movement_sparsity/linear_layer_sparsity": 0.7827469242325034, "compression/movement_sparsity/model_sparsity": 0.755857159373922, "compression_loss": 84.31043243408203, "distillation_loss": 3.323817729949951, "epoch": 3.22, "learning_rate": 3.768667230205692e-05, "loss": 87.9834, "step": 3805, "task_loss": 1.868282675743103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7897854026415021, "compression/movement_sparsity/importance_threshold": -0.0014952770671227775, "compression/movement_sparsity/linear_layer_sparsity": 0.7831248249532191, "compression/movement_sparsity/model_sparsity": 0.7562220780423226, "compression_loss": 84.34187316894531, "distillation_loss": 4.235575199127197, "epoch": 3.22, "learning_rate": 3.7681976143514605e-05, "loss": 88.0515, "step": 3806, "task_loss": 2.2716922760009766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7900838615614465, "compression/movement_sparsity/importance_threshold": -0.0014931540995264314, "compression/movement_sparsity/linear_layer_sparsity": 0.7833586221080555, "compression/movement_sparsity/model_sparsity": 0.7564478435456461, "compression_loss": 84.37332916259766, "distillation_loss": 5.080349922180176, "epoch": 3.22, "learning_rate": 3.76772799849723e-05, "loss": 88.0456, "step": 3807, "task_loss": 2.6644012928009033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7903820378498592, "compression/movement_sparsity/importance_threshold": -0.0014910331423159152, "compression/movement_sparsity/linear_layer_sparsity": 0.7836610070751333, "compression/movement_sparsity/model_sparsity": 0.7567398406588599, "compression_loss": 84.40473175048828, "distillation_loss": 2.180267333984375, "epoch": 3.22, "learning_rate": 3.767258382642998e-05, "loss": 88.3617, "step": 3808, "task_loss": 1.2051537036895752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.790679931640625, "compression/movement_sparsity/importance_threshold": -0.0014889141945388928, "compression/movement_sparsity/linear_layer_sparsity": 0.7838960920400744, "compression/movement_sparsity/model_sparsity": 0.7569668497320493, "compression_loss": 84.43607330322266, "distillation_loss": 4.672480583190918, "epoch": 3.22, "learning_rate": 3.766788766788767e-05, "loss": 88.4578, "step": 3809, "task_loss": 2.647188901901245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7909775430676292, "compression/movement_sparsity/importance_threshold": -0.0014867972552430253, "compression/movement_sparsity/linear_layer_sparsity": 0.7842091968338568, "compression/movement_sparsity/model_sparsity": 0.7572691984129423, "compression_loss": 84.46736145019531, "distillation_loss": 4.071205139160156, "epoch": 3.22, "learning_rate": 3.766319150934536e-05, "loss": 87.8356, "step": 3810, "task_loss": 2.40103816986084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7912748722647573, "compression/movement_sparsity/importance_threshold": -0.0014846823234759728, "compression/movement_sparsity/linear_layer_sparsity": 0.7846117567332436, "compression/movement_sparsity/model_sparsity": 0.757657929141366, "compression_loss": 84.49868774414062, "distillation_loss": 5.075333595275879, "epoch": 3.22, "learning_rate": 3.7658495350803044e-05, "loss": 89.1536, "step": 3811, "task_loss": 2.7665200233459473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7915719193658939, "compression/movement_sparsity/importance_threshold": -0.0014825693982854007, "compression/movement_sparsity/linear_layer_sparsity": 0.7848665404231191, "compression/movement_sparsity/model_sparsity": 0.757903960227688, "compression_loss": 84.52993774414062, "distillation_loss": 2.2682979106903076, "epoch": 3.22, "learning_rate": 3.765379919226073e-05, "loss": 88.0527, "step": 3812, "task_loss": 0.6993406414985657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7918686845049245, "compression/movement_sparsity/importance_threshold": -0.0014804584787189668, "compression/movement_sparsity/linear_layer_sparsity": 0.7851073012918548, "compression/movement_sparsity/model_sparsity": 0.7581364502199155, "compression_loss": 84.56118774414062, "distillation_loss": 3.441585063934326, "epoch": 3.22, "learning_rate": 3.7649103033718416e-05, "loss": 88.0912, "step": 3813, "task_loss": 1.6869131326675415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7921651678157339, "compression/movement_sparsity/importance_threshold": -0.0014783495638243374, "compression/movement_sparsity/linear_layer_sparsity": 0.7853227710010349, "compression/movement_sparsity/model_sparsity": 0.7583445178817229, "compression_loss": 84.59236907958984, "distillation_loss": 4.491844654083252, "epoch": 3.22, "learning_rate": 3.764440687517611e-05, "loss": 88.2481, "step": 3814, "task_loss": 2.5948410034179688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7924613694322076, "compression/movement_sparsity/importance_threshold": -0.0014762426526491708, "compression/movement_sparsity/linear_layer_sparsity": 0.7855797606619229, "compression/movement_sparsity/model_sparsity": 0.7585926791571669, "compression_loss": 84.62358093261719, "distillation_loss": 3.276313066482544, "epoch": 3.22, "learning_rate": 3.7639710716633796e-05, "loss": 88.3665, "step": 3815, "task_loss": 3.455113410949707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7927572894882304, "compression/movement_sparsity/importance_threshold": -0.001474137744241131, "compression/movement_sparsity/linear_layer_sparsity": 0.7858365118394584, "compression/movement_sparsity/model_sparsity": 0.7588406101418949, "compression_loss": 84.65473937988281, "distillation_loss": 3.6998419761657715, "epoch": 3.23, "learning_rate": 3.763501455809148e-05, "loss": 87.8993, "step": 3816, "task_loss": 3.0387821197509766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7930529281176879, "compression/movement_sparsity/importance_threshold": -0.001472034837647877, "compression/movement_sparsity/linear_layer_sparsity": 0.7860904727617669, "compression/movement_sparsity/model_sparsity": 0.759085846725247, "compression_loss": 84.68580627441406, "distillation_loss": 3.243281841278076, "epoch": 3.23, "learning_rate": 3.763031839954917e-05, "loss": 89.0597, "step": 3817, "task_loss": 1.4655113220214844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7933482854544647, "compression/movement_sparsity/importance_threshold": -0.0014699339319170744, "compression/movement_sparsity/linear_layer_sparsity": 0.786280233965524, "compression/movement_sparsity/model_sparsity": 0.7592690890478814, "compression_loss": 84.71684265136719, "distillation_loss": 2.9045162200927734, "epoch": 3.23, "learning_rate": 3.7625622241006855e-05, "loss": 88.5299, "step": 3818, "task_loss": 1.6936595439910889 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7936433616324464, "compression/movement_sparsity/importance_threshold": -0.0014678350260963818, "compression/movement_sparsity/linear_layer_sparsity": 0.7866030331075945, "compression/movement_sparsity/model_sparsity": 0.7595807990463753, "compression_loss": 84.74789428710938, "distillation_loss": 3.759547233581543, "epoch": 3.23, "learning_rate": 3.762092608246455e-05, "loss": 88.5588, "step": 3819, "task_loss": 2.0238308906555176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7939381567855177, "compression/movement_sparsity/importance_threshold": -0.0014657381192334645, "compression/movement_sparsity/linear_layer_sparsity": 0.7867395409786899, "compression/movement_sparsity/model_sparsity": 0.7597126174521512, "compression_loss": 84.7789306640625, "distillation_loss": 3.195195198059082, "epoch": 3.23, "learning_rate": 3.7616229923922234e-05, "loss": 88.233, "step": 3820, "task_loss": 3.567018747329712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.794232671047564, "compression/movement_sparsity/importance_threshold": -0.001463643210375982, "compression/movement_sparsity/linear_layer_sparsity": 0.7870506067398066, "compression/movement_sparsity/model_sparsity": 0.7600129971474234, "compression_loss": 84.80995178222656, "distillation_loss": 5.1790571212768555, "epoch": 3.23, "learning_rate": 3.761153376537992e-05, "loss": 88.6032, "step": 3821, "task_loss": 3.2834572792053223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7945269045524707, "compression/movement_sparsity/importance_threshold": -0.0014615502985715952, "compression/movement_sparsity/linear_layer_sparsity": 0.7873848173103046, "compression/movement_sparsity/model_sparsity": 0.7603357265566728, "compression_loss": 84.84092712402344, "distillation_loss": 2.8826146125793457, "epoch": 3.23, "learning_rate": 3.760683760683761e-05, "loss": 88.5083, "step": 3822, "task_loss": 2.097032308578491 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7948208574341225, "compression/movement_sparsity/importance_threshold": -0.001459459382867968, "compression/movement_sparsity/linear_layer_sparsity": 0.7875913558179254, "compression/movement_sparsity/model_sparsity": 0.7605351698311701, "compression_loss": 84.87184143066406, "distillation_loss": 4.273573875427246, "epoch": 3.23, "learning_rate": 3.76021414482953e-05, "loss": 88.8153, "step": 3823, "task_loss": 1.6940274238586426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7951145298264047, "compression/movement_sparsity/importance_threshold": -0.0014573704623127605, "compression/movement_sparsity/linear_layer_sparsity": 0.787758997690718, "compression/movement_sparsity/model_sparsity": 0.7606970526899055, "compression_loss": 84.90277099609375, "distillation_loss": 3.1401455402374268, "epoch": 3.23, "learning_rate": 3.7597445289752987e-05, "loss": 87.9544, "step": 3824, "task_loss": 1.2622421979904175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7954079218632024, "compression/movement_sparsity/importance_threshold": -0.0014552835359536364, "compression/movement_sparsity/linear_layer_sparsity": 0.7879940468831561, "compression/movement_sparsity/model_sparsity": 0.7609240272194875, "compression_loss": 84.93363189697266, "distillation_loss": 3.4192380905151367, "epoch": 3.23, "learning_rate": 3.7592749131210666e-05, "loss": 87.9407, "step": 3825, "task_loss": 1.2758463621139526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.795701033678401, "compression/movement_sparsity/importance_threshold": -0.001453198602838256, "compression/movement_sparsity/linear_layer_sparsity": 0.7882164564579003, "compression/movement_sparsity/model_sparsity": 0.7611387963411272, "compression_loss": 84.96446990966797, "distillation_loss": 3.320040225982666, "epoch": 3.23, "learning_rate": 3.758805297266836e-05, "loss": 88.4277, "step": 3826, "task_loss": 1.8456217050552368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.795993865405885, "compression/movement_sparsity/importance_threshold": -0.0014511156620142839, "compression/movement_sparsity/linear_layer_sparsity": 0.7884828662112207, "compression/movement_sparsity/model_sparsity": 0.7613960540998489, "compression_loss": 84.9952621459961, "distillation_loss": 4.0764875411987305, "epoch": 3.23, "learning_rate": 3.7583356814126046e-05, "loss": 88.5019, "step": 3827, "task_loss": 2.3633382320404053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7962864171795404, "compression/movement_sparsity/importance_threshold": -0.0014490347125293776, "compression/movement_sparsity/linear_layer_sparsity": 0.7887492998128764, "compression/movement_sparsity/model_sparsity": 0.7616533348876421, "compression_loss": 85.02597045898438, "distillation_loss": 5.774178504943848, "epoch": 3.24, "learning_rate": 3.757866065558374e-05, "loss": 89.0323, "step": 3828, "task_loss": 3.8798060417175293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7965786891332516, "compression/movement_sparsity/importance_threshold": -0.0014469557534312035, "compression/movement_sparsity/linear_layer_sparsity": 0.7891236113591338, "compression/movement_sparsity/model_sparsity": 0.7620147876807686, "compression_loss": 85.05673217773438, "distillation_loss": 3.3361620903015137, "epoch": 3.24, "learning_rate": 3.757396449704142e-05, "loss": 89.6177, "step": 3829, "task_loss": 1.395440697669983 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7968706814009041, "compression/movement_sparsity/importance_threshold": -0.00144487878376742, "compression/movement_sparsity/linear_layer_sparsity": 0.7892873421049418, "compression/movement_sparsity/model_sparsity": 0.7621728937717634, "compression_loss": 85.08746337890625, "distillation_loss": 1.950059175491333, "epoch": 3.24, "learning_rate": 3.756926833849911e-05, "loss": 88.1312, "step": 3830, "task_loss": 1.6815049648284912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7971623941163832, "compression/movement_sparsity/importance_threshold": -0.0014428038025856892, "compression/movement_sparsity/linear_layer_sparsity": 0.7895377019286244, "compression/movement_sparsity/model_sparsity": 0.7624146529653056, "compression_loss": 85.11808776855469, "distillation_loss": 3.224099636077881, "epoch": 3.24, "learning_rate": 3.75645721799568e-05, "loss": 88.5572, "step": 3831, "task_loss": 1.773796796798706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7974538274135735, "compression/movement_sparsity/importance_threshold": -0.0014407308089336764, "compression/movement_sparsity/linear_layer_sparsity": 0.7896953751972734, "compression/movement_sparsity/model_sparsity": 0.7625669096721168, "compression_loss": 85.14875793457031, "distillation_loss": 6.404293060302734, "epoch": 3.24, "learning_rate": 3.7559876021414484e-05, "loss": 89.5353, "step": 3832, "task_loss": 2.422086715698242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7977449814263609, "compression/movement_sparsity/importance_threshold": -0.0014386598018590385, "compression/movement_sparsity/linear_layer_sparsity": 0.7899954468758299, "compression/movement_sparsity/model_sparsity": 0.7628566729653864, "compression_loss": 85.17940521240234, "distillation_loss": 2.7856106758117676, "epoch": 3.24, "learning_rate": 3.755517986287217e-05, "loss": 88.8417, "step": 3833, "task_loss": 1.1924329996109009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7980358562886297, "compression/movement_sparsity/importance_threshold": -0.0014365907804094408, "compression/movement_sparsity/linear_layer_sparsity": 0.7901679776573532, "compression/movement_sparsity/model_sparsity": 0.7630232767837977, "compression_loss": 85.20999908447266, "distillation_loss": 5.558193683624268, "epoch": 3.24, "learning_rate": 3.755048370432986e-05, "loss": 89.7805, "step": 3834, "task_loss": 3.9305381774902344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7983264521342657, "compression/movement_sparsity/importance_threshold": -0.0014345237436325445, "compression/movement_sparsity/linear_layer_sparsity": 0.7904915876428229, "compression/movement_sparsity/model_sparsity": 0.7633357697707257, "compression_loss": 85.2405776977539, "distillation_loss": 3.940725326538086, "epoch": 3.24, "learning_rate": 3.754578754578755e-05, "loss": 88.8403, "step": 3835, "task_loss": 2.099304676055908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7986167690971536, "compression/movement_sparsity/importance_threshold": -0.0014324586905760106, "compression/movement_sparsity/linear_layer_sparsity": 0.7907356515059938, "compression/movement_sparsity/model_sparsity": 0.7635714492893684, "compression_loss": 85.27110290527344, "distillation_loss": 4.540655136108398, "epoch": 3.24, "learning_rate": 3.7541091387245236e-05, "loss": 89.0295, "step": 3836, "task_loss": 2.282222032546997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.798906807311179, "compression/movement_sparsity/importance_threshold": -0.0014303956202875012, "compression/movement_sparsity/linear_layer_sparsity": 0.7909744568112372, "compression/movement_sparsity/model_sparsity": 0.7638020508977256, "compression_loss": 85.3016357421875, "distillation_loss": 3.306525707244873, "epoch": 3.24, "learning_rate": 3.753639522870292e-05, "loss": 88.5438, "step": 3837, "task_loss": 1.3490004539489746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7991965669102266, "compression/movement_sparsity/importance_threshold": -0.001428334531814679, "compression/movement_sparsity/linear_layer_sparsity": 0.7912223721805544, "compression/movement_sparsity/model_sparsity": 0.76404144961143, "compression_loss": 85.33207702636719, "distillation_loss": 4.582644462585449, "epoch": 3.24, "learning_rate": 3.753169907016061e-05, "loss": 89.8546, "step": 3838, "task_loss": 2.2369418144226074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7994860480281816, "compression/movement_sparsity/importance_threshold": -0.001426275424205206, "compression/movement_sparsity/linear_layer_sparsity": 0.7915219311199025, "compression/movement_sparsity/model_sparsity": 0.7643307177796603, "compression_loss": 85.36251831054688, "distillation_loss": 4.300042629241943, "epoch": 3.24, "learning_rate": 3.7527002911618295e-05, "loss": 88.8643, "step": 3839, "task_loss": 2.258991241455078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7997752507989294, "compression/movement_sparsity/importance_threshold": -0.0014242182965067416, "compression/movement_sparsity/linear_layer_sparsity": 0.7917814725526647, "compression/movement_sparsity/model_sparsity": 0.7645813431657643, "compression_loss": 85.3929672241211, "distillation_loss": 3.5070152282714844, "epoch": 3.25, "learning_rate": 3.752230675307599e-05, "loss": 88.7642, "step": 3840, "task_loss": 2.0244126319885254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8000641753563549, "compression/movement_sparsity/importance_threshold": -0.0014221631477669512, "compression/movement_sparsity/linear_layer_sparsity": 0.7921885636357531, "compression/movement_sparsity/model_sparsity": 0.7649744494177899, "compression_loss": 85.42337036132812, "distillation_loss": 7.196254253387451, "epoch": 3.25, "learning_rate": 3.7517610594533675e-05, "loss": 90.0865, "step": 3841, "task_loss": 4.11060905456543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8003528218343435, "compression/movement_sparsity/importance_threshold": -0.0014201099770334933, "compression/movement_sparsity/linear_layer_sparsity": 0.7923142920593057, "compression/movement_sparsity/model_sparsity": 0.7650958586832076, "compression_loss": 85.4537124633789, "distillation_loss": 5.17319917678833, "epoch": 3.25, "learning_rate": 3.751291443599136e-05, "loss": 89.8813, "step": 3842, "task_loss": 3.1535584926605225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8006411903667799, "compression/movement_sparsity/importance_threshold": -0.0014180587833540325, "compression/movement_sparsity/linear_layer_sparsity": 0.7925344479663666, "compression/movement_sparsity/model_sparsity": 0.7653084515575822, "compression_loss": 85.48411560058594, "distillation_loss": 5.4898271560668945, "epoch": 3.25, "learning_rate": 3.750821827744905e-05, "loss": 90.1106, "step": 3843, "task_loss": 2.7833287715911865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8009292810875499, "compression/movement_sparsity/importance_threshold": -0.0014160095657762281, "compression/movement_sparsity/linear_layer_sparsity": 0.7927355252052102, "compression/movement_sparsity/model_sparsity": 0.7655026211746855, "compression_loss": 85.5144271850586, "distillation_loss": 3.5160980224609375, "epoch": 3.25, "learning_rate": 3.7503522118906734e-05, "loss": 89.2068, "step": 3844, "task_loss": 2.5882225036621094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8012170941305379, "compression/movement_sparsity/importance_threshold": -0.0014139623233477457, "compression/movement_sparsity/linear_layer_sparsity": 0.7930126667094028, "compression/movement_sparsity/model_sparsity": 0.7657702420156223, "compression_loss": 85.54470825195312, "distillation_loss": 3.3571043014526367, "epoch": 3.25, "learning_rate": 3.749882596036443e-05, "loss": 89.5516, "step": 3845, "task_loss": 1.606790542602539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8015046296296298, "compression/movement_sparsity/importance_threshold": -0.001411917055116241, "compression/movement_sparsity/linear_layer_sparsity": 0.7931505816322793, "compression/movement_sparsity/model_sparsity": 0.765903419136622, "compression_loss": 85.57492065429688, "distillation_loss": 3.828917980194092, "epoch": 3.25, "learning_rate": 3.7494129801822106e-05, "loss": 89.3882, "step": 3846, "task_loss": 1.9045441150665283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8017918877187099, "compression/movement_sparsity/importance_threshold": -0.001409873760129383, "compression/movement_sparsity/linear_layer_sparsity": 0.7934027539294425, "compression/movement_sparsity/model_sparsity": 0.7661469285396049, "compression_loss": 85.60513305664062, "distillation_loss": 5.104122638702393, "epoch": 3.25, "learning_rate": 3.74894336432798e-05, "loss": 89.477, "step": 3847, "task_loss": 3.6935737133026123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8020788685316641, "compression/movement_sparsity/importance_threshold": -0.0014078324374348274, "compression/movement_sparsity/linear_layer_sparsity": 0.7935546082042853, "compression/movement_sparsity/model_sparsity": 0.7662935661529483, "compression_loss": 85.63529968261719, "distillation_loss": 4.813873291015625, "epoch": 3.25, "learning_rate": 3.7484737484737486e-05, "loss": 89.7257, "step": 3848, "task_loss": 2.863041877746582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8023655722023769, "compression/movement_sparsity/importance_threshold": -0.0014057930860802417, "compression/movement_sparsity/linear_layer_sparsity": 0.7937520366478322, "compression/movement_sparsity/model_sparsity": 0.7664842123220985, "compression_loss": 85.66542053222656, "distillation_loss": 4.626608848571777, "epoch": 3.25, "learning_rate": 3.748004132619517e-05, "loss": 90.0279, "step": 3849, "task_loss": 3.0130233764648438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.802651998864734, "compression/movement_sparsity/importance_threshold": -0.0014037557051132825, "compression/movement_sparsity/linear_layer_sparsity": 0.7940502243079023, "compression/movement_sparsity/model_sparsity": 0.7667721563187126, "compression_loss": 85.69554901123047, "distillation_loss": 4.43391227722168, "epoch": 3.25, "learning_rate": 3.7475345167652865e-05, "loss": 89.487, "step": 3850, "task_loss": 2.022402763366699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8029381486526203, "compression/movement_sparsity/importance_threshold": -0.0014017202935816151, "compression/movement_sparsity/linear_layer_sparsity": 0.7942077783348749, "compression/movement_sparsity/model_sparsity": 0.7669242978801659, "compression_loss": 85.72562408447266, "distillation_loss": 4.669116020202637, "epoch": 3.26, "learning_rate": 3.7470649009110545e-05, "loss": 90.2253, "step": 3851, "task_loss": 2.645038604736328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8032240216999207, "compression/movement_sparsity/importance_threshold": -0.0013996868505329017, "compression/movement_sparsity/linear_layer_sparsity": 0.794365153499333, "compression/movement_sparsity/model_sparsity": 0.7670762667235822, "compression_loss": 85.75566101074219, "distillation_loss": 5.220551490783691, "epoch": 3.26, "learning_rate": 3.746595285056824e-05, "loss": 89.8635, "step": 3852, "task_loss": 2.592268228530884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8035096181405209, "compression/movement_sparsity/importance_threshold": -0.0013976553750147998, "compression/movement_sparsity/linear_layer_sparsity": 0.7946318136601738, "compression/movement_sparsity/model_sparsity": 0.7673337662875556, "compression_loss": 85.78571319580078, "distillation_loss": 4.303621292114258, "epoch": 3.26, "learning_rate": 3.7461256692025924e-05, "loss": 89.725, "step": 3853, "task_loss": 1.5967799425125122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8037949381083055, "compression/movement_sparsity/importance_threshold": -0.0013956258660749758, "compression/movement_sparsity/linear_layer_sparsity": 0.7949110180453676, "compression/movement_sparsity/model_sparsity": 0.7676033791431848, "compression_loss": 85.8156967163086, "distillation_loss": 3.539348602294922, "epoch": 3.26, "learning_rate": 3.745656053348362e-05, "loss": 90.4692, "step": 3854, "task_loss": 1.7544727325439453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.80407998173716, "compression/movement_sparsity/importance_threshold": -0.0013935983227610898, "compression/movement_sparsity/linear_layer_sparsity": 0.7950758100420952, "compression/movement_sparsity/model_sparsity": 0.7677625100278653, "compression_loss": 85.84565734863281, "distillation_loss": 3.6888315677642822, "epoch": 3.26, "learning_rate": 3.74518643749413e-05, "loss": 89.3109, "step": 3855, "task_loss": 2.847992420196533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8043647491609693, "compression/movement_sparsity/importance_threshold": -0.001391572744120803, "compression/movement_sparsity/linear_layer_sparsity": 0.7952948093048954, "compression/movement_sparsity/model_sparsity": 0.7679739859922678, "compression_loss": 85.87557220458984, "distillation_loss": 2.641328811645508, "epoch": 3.26, "learning_rate": 3.744716821639899e-05, "loss": 89.4373, "step": 3856, "task_loss": 2.7741470336914062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8046492405136187, "compression/movement_sparsity/importance_threshold": -0.001389549129201779, "compression/movement_sparsity/linear_layer_sparsity": 0.7955650824885299, "compression/movement_sparsity/model_sparsity": 0.7682349744605869, "compression_loss": 85.90550994873047, "distillation_loss": 4.188631057739258, "epoch": 3.26, "learning_rate": 3.7442472057856676e-05, "loss": 89.486, "step": 3857, "task_loss": 2.764583110809326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8049334559289932, "compression/movement_sparsity/importance_threshold": -0.001387527477051679, "compression/movement_sparsity/linear_layer_sparsity": 0.7958035300687443, "compression/movement_sparsity/model_sparsity": 0.7684652306328703, "compression_loss": 85.93533325195312, "distillation_loss": 1.6426973342895508, "epoch": 3.26, "learning_rate": 3.743777589931436e-05, "loss": 88.6851, "step": 3858, "task_loss": 1.1686151027679443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8052173955409783, "compression/movement_sparsity/importance_threshold": -0.0013855077867181622, "compression/movement_sparsity/linear_layer_sparsity": 0.7960511115613677, "compression/movement_sparsity/model_sparsity": 0.7687043069395724, "compression_loss": 85.96515655517578, "distillation_loss": 4.935452938079834, "epoch": 3.26, "learning_rate": 3.743307974077205e-05, "loss": 90.0924, "step": 3859, "task_loss": 2.0204555988311768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8055010594834585, "compression/movement_sparsity/importance_threshold": -0.001383490057248895, "compression/movement_sparsity/linear_layer_sparsity": 0.7961891695742557, "compression/movement_sparsity/model_sparsity": 0.7688376222350015, "compression_loss": 85.994873046875, "distillation_loss": 3.5719261169433594, "epoch": 3.26, "learning_rate": 3.7428383582229735e-05, "loss": 90.065, "step": 3860, "task_loss": 1.8184473514556885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8057844478903194, "compression/movement_sparsity/importance_threshold": -0.001381474287691537, "compression/movement_sparsity/linear_layer_sparsity": 0.7963829969191767, "compression/movement_sparsity/model_sparsity": 0.7690247910143418, "compression_loss": 86.02465057373047, "distillation_loss": 3.6207492351531982, "epoch": 3.26, "learning_rate": 3.742368742368743e-05, "loss": 89.1698, "step": 3861, "task_loss": 2.87886905670166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8060675608954462, "compression/movement_sparsity/importance_threshold": -0.001379460477093749, "compression/movement_sparsity/linear_layer_sparsity": 0.7966931206710501, "compression/movement_sparsity/model_sparsity": 0.7693242610612863, "compression_loss": 86.05440521240234, "distillation_loss": 2.938354015350342, "epoch": 3.26, "learning_rate": 3.7418991265145115e-05, "loss": 89.1363, "step": 3862, "task_loss": 1.7569653987884521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8063503986327237, "compression/movement_sparsity/importance_threshold": -0.0013774486245031947, "compression/movement_sparsity/linear_layer_sparsity": 0.7969438859164324, "compression/movement_sparsity/model_sparsity": 0.7695664117490454, "compression_loss": 86.08409118652344, "distillation_loss": 2.8263556957244873, "epoch": 3.27, "learning_rate": 3.74142951066028e-05, "loss": 90.3466, "step": 3863, "task_loss": 1.0375018119812012 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8066329612360374, "compression/movement_sparsity/importance_threshold": -0.0013754387289675344, "compression/movement_sparsity/linear_layer_sparsity": 0.7970684696198919, "compression/movement_sparsity/model_sparsity": 0.7696867156190268, "compression_loss": 86.11370086669922, "distillation_loss": 3.137869119644165, "epoch": 3.27, "learning_rate": 3.740959894806049e-05, "loss": 89.4147, "step": 3864, "task_loss": 1.4998860359191895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8069152488392721, "compression/movement_sparsity/importance_threshold": -0.001373430789534432, "compression/movement_sparsity/linear_layer_sparsity": 0.7973730486338148, "compression/movement_sparsity/model_sparsity": 0.7699808314068267, "compression_loss": 86.14334106445312, "distillation_loss": 3.6086950302124023, "epoch": 3.27, "learning_rate": 3.7404902789518174e-05, "loss": 89.4453, "step": 3865, "task_loss": 1.6405483484268188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8071972615763134, "compression/movement_sparsity/importance_threshold": -0.0013714248052515456, "compression/movement_sparsity/linear_layer_sparsity": 0.7976035547183836, "compression/movement_sparsity/model_sparsity": 0.770203418898271, "compression_loss": 86.1729736328125, "distillation_loss": 3.8072001934051514, "epoch": 3.27, "learning_rate": 3.740020663097587e-05, "loss": 89.621, "step": 3866, "task_loss": 2.574399471282959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8074789995810459, "compression/movement_sparsity/importance_threshold": -0.0013694207751665426, "compression/movement_sparsity/linear_layer_sparsity": 0.7977920877328742, "compression/movement_sparsity/model_sparsity": 0.7703854752237185, "compression_loss": 86.2025375366211, "distillation_loss": 3.6322059631347656, "epoch": 3.27, "learning_rate": 3.739551047243355e-05, "loss": 90.0896, "step": 3867, "task_loss": 1.9810761213302612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8077604629873552, "compression/movement_sparsity/importance_threshold": -0.001367418698327079, "compression/movement_sparsity/linear_layer_sparsity": 0.7980606557605368, "compression/movement_sparsity/model_sparsity": 0.7706448171134189, "compression_loss": 86.23204040527344, "distillation_loss": 4.025532245635986, "epoch": 3.27, "learning_rate": 3.739081431389124e-05, "loss": 90.1818, "step": 3868, "task_loss": 1.4648290872573853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8080416519291259, "compression/movement_sparsity/importance_threshold": -0.0013654185737808217, "compression/movement_sparsity/linear_layer_sparsity": 0.7983942939709883, "compression/movement_sparsity/model_sparsity": 0.7709669938249502, "compression_loss": 86.26158905029297, "distillation_loss": 5.127395153045654, "epoch": 3.27, "learning_rate": 3.7386118155348926e-05, "loss": 90.0869, "step": 3869, "task_loss": 1.971148133277893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8083225665402436, "compression/movement_sparsity/importance_threshold": -0.0013634204005754302, "compression/movement_sparsity/linear_layer_sparsity": 0.7986174189957905, "compression/movement_sparsity/model_sparsity": 0.7711824538187376, "compression_loss": 86.29106140136719, "distillation_loss": 5.92673921585083, "epoch": 3.27, "learning_rate": 3.738142199680661e-05, "loss": 90.5681, "step": 3870, "task_loss": 2.2187798023223877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8086032069545934, "compression/movement_sparsity/importance_threshold": -0.0013614241777585648, "compression/movement_sparsity/linear_layer_sparsity": 0.7989178126268731, "compression/movement_sparsity/model_sparsity": 0.7714725280044737, "compression_loss": 86.32057189941406, "distillation_loss": 3.5463409423828125, "epoch": 3.27, "learning_rate": 3.7376725838264305e-05, "loss": 90.3072, "step": 3871, "task_loss": 2.3925466537475586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8088835733060602, "compression/movement_sparsity/importance_threshold": -0.0013594299043778908, "compression/movement_sparsity/linear_layer_sparsity": 0.7990326423612065, "compression/movement_sparsity/model_sparsity": 0.7715834129841751, "compression_loss": 86.34999084472656, "distillation_loss": 5.052990913391113, "epoch": 3.27, "learning_rate": 3.7372029679721985e-05, "loss": 90.7633, "step": 3872, "task_loss": 3.1327579021453857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8091636657285296, "compression/movement_sparsity/importance_threshold": -0.001357437579481066, "compression/movement_sparsity/linear_layer_sparsity": 0.7991720001083669, "compression/movement_sparsity/model_sparsity": 0.771717983364006, "compression_loss": 86.37943267822266, "distillation_loss": 2.7614786624908447, "epoch": 3.27, "learning_rate": 3.736733352117968e-05, "loss": 89.5275, "step": 3873, "task_loss": 1.9580072164535522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8094434843558862, "compression/movement_sparsity/importance_threshold": -0.0013554472021157556, "compression/movement_sparsity/linear_layer_sparsity": 0.7993846080173143, "compression/movement_sparsity/model_sparsity": 0.7719232875372226, "compression_loss": 86.40882110595703, "distillation_loss": 3.308140754699707, "epoch": 3.27, "learning_rate": 3.7362637362637365e-05, "loss": 89.893, "step": 3874, "task_loss": 1.7100189924240112 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8097230293220155, "compression/movement_sparsity/importance_threshold": -0.001353458771329621, "compression/movement_sparsity/linear_layer_sparsity": 0.7997155394417124, "compression/movement_sparsity/model_sparsity": 0.7722428504491285, "compression_loss": 86.43811798095703, "distillation_loss": 3.6927294731140137, "epoch": 3.28, "learning_rate": 3.735794120409505e-05, "loss": 90.0024, "step": 3875, "task_loss": 1.8627514839172363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8100023007608024, "compression/movement_sparsity/importance_threshold": -0.001351472286170323, "compression/movement_sparsity/linear_layer_sparsity": 0.8000089812830633, "compression/movement_sparsity/model_sparsity": 0.7725262116604964, "compression_loss": 86.4674072265625, "distillation_loss": 4.589064598083496, "epoch": 3.28, "learning_rate": 3.735324504555274e-05, "loss": 89.8983, "step": 3876, "task_loss": 2.6589133739471436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8102812988061322, "compression/movement_sparsity/importance_threshold": -0.0013494877456855228, "compression/movement_sparsity/linear_layer_sparsity": 0.8002946485671157, "compression/movement_sparsity/model_sparsity": 0.7728020653945261, "compression_loss": 86.49666595458984, "distillation_loss": 4.791359901428223, "epoch": 3.28, "learning_rate": 3.7348548887010424e-05, "loss": 90.4731, "step": 3877, "task_loss": 2.6851353645324707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8105600235918899, "compression/movement_sparsity/importance_threshold": -0.001347505148922885, "compression/movement_sparsity/linear_layer_sparsity": 0.8005527710239292, "compression/movement_sparsity/model_sparsity": 0.7730513205508706, "compression_loss": 86.5259017944336, "distillation_loss": 3.0303797721862793, "epoch": 3.28, "learning_rate": 3.734385272846812e-05, "loss": 89.8667, "step": 3878, "task_loss": 2.1783087253570557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8108384752519607, "compression/movement_sparsity/importance_threshold": -0.0013455244949300699, "compression/movement_sparsity/linear_layer_sparsity": 0.8006880149332551, "compression/movement_sparsity/model_sparsity": 0.7731819184158523, "compression_loss": 86.5550765991211, "distillation_loss": 4.207998275756836, "epoch": 3.28, "learning_rate": 3.73391565699258e-05, "loss": 90.853, "step": 3879, "task_loss": 1.8584429025650024 }, { "compression/movement_sparsity/importance_regularization_factor": 0.81111665392023, "compression/movement_sparsity/importance_threshold": -0.0013435457827547376, "compression/movement_sparsity/linear_layer_sparsity": 0.8007980392280313, "compression/movement_sparsity/model_sparsity": 0.7732881630376285, "compression_loss": 86.58430480957031, "distillation_loss": 4.963778495788574, "epoch": 3.28, "learning_rate": 3.7334460411383496e-05, "loss": 90.7406, "step": 3880, "task_loss": 3.5687367916107178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8113945597305825, "compression/movement_sparsity/importance_threshold": -0.0013415690114445536, "compression/movement_sparsity/linear_layer_sparsity": 0.8010137593447317, "compression/movement_sparsity/model_sparsity": 0.7734964725046876, "compression_loss": 86.6134262084961, "distillation_loss": 3.40206241607666, "epoch": 3.28, "learning_rate": 3.7329764252841176e-05, "loss": 90.1198, "step": 3881, "task_loss": 1.1703693866729736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8116721928169038, "compression/movement_sparsity/importance_threshold": -0.0013395941800471746, "compression/movement_sparsity/linear_layer_sparsity": 0.8012700931763999, "compression/movement_sparsity/model_sparsity": 0.7737440004806628, "compression_loss": 86.6425552368164, "distillation_loss": 2.99239444732666, "epoch": 3.28, "learning_rate": 3.732506809429886e-05, "loss": 90.3441, "step": 3882, "task_loss": 1.786189079284668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8119495533130786, "compression/movement_sparsity/importance_threshold": -0.0013376212876102687, "compression/movement_sparsity/linear_layer_sparsity": 0.8015370991381021, "compression/movement_sparsity/model_sparsity": 0.7740018339661742, "compression_loss": 86.6716537475586, "distillation_loss": 4.807524681091309, "epoch": 3.28, "learning_rate": 3.7320371935756555e-05, "loss": 90.8984, "step": 3883, "task_loss": 3.214905023574829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8122266413529925, "compression/movement_sparsity/importance_threshold": -0.0013356503331814918, "compression/movement_sparsity/linear_layer_sparsity": 0.8018223133037843, "compression/movement_sparsity/model_sparsity": 0.7742772501478438, "compression_loss": 86.70076751708984, "distillation_loss": 3.7104978561401367, "epoch": 3.28, "learning_rate": 3.731567577721424e-05, "loss": 90.6603, "step": 3884, "task_loss": 1.6102288961410522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8125034570705301, "compression/movement_sparsity/importance_threshold": -0.001333681315808511, "compression/movement_sparsity/linear_layer_sparsity": 0.8019398736725063, "compression/movement_sparsity/model_sparsity": 0.7743907719562422, "compression_loss": 86.72978973388672, "distillation_loss": 3.6302380561828613, "epoch": 3.28, "learning_rate": 3.731097961867193e-05, "loss": 90.511, "step": 3885, "task_loss": 1.8573240041732788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.812780000599577, "compression/movement_sparsity/importance_threshold": -0.001331714234538984, "compression/movement_sparsity/linear_layer_sparsity": 0.8020659121244174, "compression/movement_sparsity/model_sparsity": 0.7745124805995904, "compression_loss": 86.7587661743164, "distillation_loss": 4.016851425170898, "epoch": 3.28, "learning_rate": 3.7306283460129614e-05, "loss": 90.6664, "step": 3886, "task_loss": 2.4021944999694824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.813056272074018, "compression/movement_sparsity/importance_threshold": -0.0013297490884205754, "compression/movement_sparsity/linear_layer_sparsity": 0.802288894059208, "compression/movement_sparsity/model_sparsity": 0.7747278024189483, "compression_loss": 86.78775024414062, "distillation_loss": 4.915066242218018, "epoch": 3.29, "learning_rate": 3.730158730158731e-05, "loss": 90.9107, "step": 3887, "task_loss": 2.525101900100708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8133322716277386, "compression/movement_sparsity/importance_threshold": -0.0013277858765009453, "compression/movement_sparsity/linear_layer_sparsity": 0.8025110651505993, "compression/movement_sparsity/model_sparsity": 0.7749423412498723, "compression_loss": 86.81668090820312, "distillation_loss": 3.129725694656372, "epoch": 3.29, "learning_rate": 3.7296891143044994e-05, "loss": 90.1534, "step": 3888, "task_loss": 1.8941864967346191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8136079993946235, "compression/movement_sparsity/importance_threshold": -0.0013258245978277575, "compression/movement_sparsity/linear_layer_sparsity": 0.8028696367955774, "compression/movement_sparsity/model_sparsity": 0.7752885948557499, "compression_loss": 86.84556579589844, "distillation_loss": 4.211857795715332, "epoch": 3.29, "learning_rate": 3.729219498450267e-05, "loss": 90.7799, "step": 3889, "task_loss": 2.632563829421997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8138834555085583, "compression/movement_sparsity/importance_threshold": -0.0013238652514486705, "compression/movement_sparsity/linear_layer_sparsity": 0.8031028973628702, "compression/movement_sparsity/model_sparsity": 0.7755138422049627, "compression_loss": 86.87449645996094, "distillation_loss": 3.6581764221191406, "epoch": 3.29, "learning_rate": 3.7287498825960366e-05, "loss": 90.5334, "step": 3890, "task_loss": 2.4628260135650635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8141586401034278, "compression/movement_sparsity/importance_threshold": -0.0013219078364113505, "compression/movement_sparsity/linear_layer_sparsity": 0.8032439006451643, "compression/movement_sparsity/model_sparsity": 0.7756500015907332, "compression_loss": 86.90331268310547, "distillation_loss": 3.94869327545166, "epoch": 3.29, "learning_rate": 3.728280266741805e-05, "loss": 90.5671, "step": 3891, "task_loss": 2.4403576850891113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8144335533131172, "compression/movement_sparsity/importance_threshold": -0.001319952351763457, "compression/movement_sparsity/linear_layer_sparsity": 0.8034599904110613, "compression/movement_sparsity/model_sparsity": 0.7758586680084018, "compression_loss": 86.93211364746094, "distillation_loss": 3.2101314067840576, "epoch": 3.29, "learning_rate": 3.7278106508875746e-05, "loss": 90.7328, "step": 3892, "task_loss": 2.4134271144866943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8147081952715118, "compression/movement_sparsity/importance_threshold": -0.0013179987965526501, "compression/movement_sparsity/linear_layer_sparsity": 0.8037413411464296, "compression/movement_sparsity/model_sparsity": 0.776130353480474, "compression_loss": 86.96090698242188, "distillation_loss": 4.666472911834717, "epoch": 3.29, "learning_rate": 3.7273410350333425e-05, "loss": 90.1804, "step": 3893, "task_loss": 2.283299684524536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8149825661124966, "compression/movement_sparsity/importance_threshold": -0.0013160471698265954, "compression/movement_sparsity/linear_layer_sparsity": 0.804094069949266, "compression/movement_sparsity/model_sparsity": 0.7764709649638123, "compression_loss": 86.98971557617188, "distillation_loss": 5.364029884338379, "epoch": 3.29, "learning_rate": 3.726871419179112e-05, "loss": 91.1553, "step": 3894, "task_loss": 2.3699378967285156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.815256665969957, "compression/movement_sparsity/importance_threshold": -0.0013140974706329503, "compression/movement_sparsity/linear_layer_sparsity": 0.8043013835277831, "compression/movement_sparsity/model_sparsity": 0.7766711566831361, "compression_loss": 87.01842498779297, "distillation_loss": 2.744577407836914, "epoch": 3.29, "learning_rate": 3.7264018033248805e-05, "loss": 90.1938, "step": 3895, "task_loss": 1.294248104095459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8155304949777777, "compression/movement_sparsity/importance_threshold": -0.0013121496980193822, "compression/movement_sparsity/linear_layer_sparsity": 0.804666084194926, "compression/movement_sparsity/model_sparsity": 0.7770233287604122, "compression_loss": 87.04713439941406, "distillation_loss": 2.850207567214966, "epoch": 3.29, "learning_rate": 3.725932187470649e-05, "loss": 90.3609, "step": 3896, "task_loss": 1.3194524049758911 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8158040532698443, "compression/movement_sparsity/importance_threshold": -0.0013102038510335477, "compression/movement_sparsity/linear_layer_sparsity": 0.8050384878743616, "compression/movement_sparsity/model_sparsity": 0.7773829392278115, "compression_loss": 87.07585144042969, "distillation_loss": 3.502723455429077, "epoch": 3.29, "learning_rate": 3.7254625716164184e-05, "loss": 90.4221, "step": 3897, "task_loss": 2.9089601039886475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8160773409800414, "compression/movement_sparsity/importance_threshold": -0.0013082599287231132, "compression/movement_sparsity/linear_layer_sparsity": 0.805251882778373, "compression/movement_sparsity/model_sparsity": 0.7775890033603906, "compression_loss": 87.10447692871094, "distillation_loss": 4.575587272644043, "epoch": 3.29, "learning_rate": 3.7249929557621864e-05, "loss": 91.1963, "step": 3898, "task_loss": 1.1889830827713013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8163503582422547, "compression/movement_sparsity/importance_threshold": -0.0013063179301357362, "compression/movement_sparsity/linear_layer_sparsity": 0.8055109353202622, "compression/movement_sparsity/model_sparsity": 0.777839156650527, "compression_loss": 87.13310241699219, "distillation_loss": 3.5662457942962646, "epoch": 3.3, "learning_rate": 3.724523339907956e-05, "loss": 90.4496, "step": 3899, "task_loss": 1.9819282293319702 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8166231051903688, "compression/movement_sparsity/importance_threshold": -0.0013043778543190823, "compression/movement_sparsity/linear_layer_sparsity": 0.8057094965597346, "compression/movement_sparsity/model_sparsity": 0.7780308967005777, "compression_loss": 87.16172790527344, "distillation_loss": 3.3208515644073486, "epoch": 3.3, "learning_rate": 3.724053724053724e-05, "loss": 90.8988, "step": 3900, "task_loss": 3.131301164627075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8168955819582693, "compression/movement_sparsity/importance_threshold": -0.0013024397003208116, "compression/movement_sparsity/linear_layer_sparsity": 0.8059522606886331, "compression/movement_sparsity/model_sparsity": 0.7782653211348187, "compression_loss": 87.1902847290039, "distillation_loss": 4.476731777191162, "epoch": 3.3, "learning_rate": 3.723584108199493e-05, "loss": 90.9247, "step": 3901, "task_loss": 2.8104166984558105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8171677886798413, "compression/movement_sparsity/importance_threshold": -0.0013005034671885844, "compression/movement_sparsity/linear_layer_sparsity": 0.8059940668203645, "compression/movement_sparsity/model_sparsity": 0.7783056910973144, "compression_loss": 87.21881866455078, "distillation_loss": 3.796083927154541, "epoch": 3.3, "learning_rate": 3.7231144923452616e-05, "loss": 91.0536, "step": 3902, "task_loss": 2.7004988193511963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8174397254889696, "compression/movement_sparsity/importance_threshold": -0.0012985691539700661, "compression/movement_sparsity/linear_layer_sparsity": 0.8063936695261774, "compression/movement_sparsity/model_sparsity": 0.7786915662208611, "compression_loss": 87.24732971191406, "distillation_loss": 2.9230597019195557, "epoch": 3.3, "learning_rate": 3.72264487649103e-05, "loss": 91.3207, "step": 3903, "task_loss": 1.51861572265625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8177113925195397, "compression/movement_sparsity/importance_threshold": -0.0012966367597129153, "compression/movement_sparsity/linear_layer_sparsity": 0.8066624879613603, "compression/movement_sparsity/model_sparsity": 0.7789511499158132, "compression_loss": 87.27582550048828, "distillation_loss": 3.3723082542419434, "epoch": 3.3, "learning_rate": 3.7221752606367995e-05, "loss": 91.0461, "step": 3904, "task_loss": 3.0612215995788574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8179827899054365, "compression/movement_sparsity/importance_threshold": -0.0012947062834647964, "compression/movement_sparsity/linear_layer_sparsity": 0.8069320576191044, "compression/movement_sparsity/model_sparsity": 0.7792114590265203, "compression_loss": 87.30429077148438, "distillation_loss": 4.127017974853516, "epoch": 3.3, "learning_rate": 3.721705644782568e-05, "loss": 91.2371, "step": 3905, "task_loss": 1.9191899299621582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8182539177805455, "compression/movement_sparsity/importance_threshold": -0.001292777724273368, "compression/movement_sparsity/linear_layer_sparsity": 0.8070789395160429, "compression/movement_sparsity/model_sparsity": 0.7793532950784375, "compression_loss": 87.33273315429688, "distillation_loss": 4.240268707275391, "epoch": 3.3, "learning_rate": 3.721236028928337e-05, "loss": 91.7431, "step": 3906, "task_loss": 1.8947890996932983 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8185247762787512, "compression/movement_sparsity/importance_threshold": -0.0012908510811862955, "compression/movement_sparsity/linear_layer_sparsity": 0.8072707755249686, "compression/movement_sparsity/model_sparsity": 0.7795385409303001, "compression_loss": 87.361083984375, "distillation_loss": 3.0999388694763184, "epoch": 3.3, "learning_rate": 3.7207664130741054e-05, "loss": 91.165, "step": 3907, "task_loss": 1.8304029703140259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8187953655339393, "compression/movement_sparsity/importance_threshold": -0.0012889263532512392, "compression/movement_sparsity/linear_layer_sparsity": 0.8075115960145425, "compression/movement_sparsity/model_sparsity": 0.7797710884952066, "compression_loss": 87.3895492553711, "distillation_loss": 3.282078504562378, "epoch": 3.3, "learning_rate": 3.720296797219874e-05, "loss": 91.3489, "step": 3908, "task_loss": 1.6398612260818481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8190656856799947, "compression/movement_sparsity/importance_threshold": -0.001287003539515861, "compression/movement_sparsity/linear_layer_sparsity": 0.8077509021348267, "compression/movement_sparsity/model_sparsity": 0.7800021737140673, "compression_loss": 87.41792297363281, "distillation_loss": 3.4297962188720703, "epoch": 3.3, "learning_rate": 3.7198271813656434e-05, "loss": 91.7582, "step": 3909, "task_loss": 2.474107503890991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8193357368508025, "compression/movement_sparsity/importance_threshold": -0.0012850826390278227, "compression/movement_sparsity/linear_layer_sparsity": 0.8079497018576518, "compression/movement_sparsity/model_sparsity": 0.7801941440548339, "compression_loss": 87.44625091552734, "distillation_loss": 4.749983787536621, "epoch": 3.3, "learning_rate": 3.7193575655114113e-05, "loss": 92.0042, "step": 3910, "task_loss": 2.8107259273529053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8196055191802482, "compression/movement_sparsity/importance_threshold": -0.0012831636508347857, "compression/movement_sparsity/linear_layer_sparsity": 0.8081543801951213, "compression/movement_sparsity/model_sparsity": 0.7803917910617472, "compression_loss": 87.47459411621094, "distillation_loss": 4.207409381866455, "epoch": 3.31, "learning_rate": 3.7188879496571807e-05, "loss": 91.2615, "step": 3911, "task_loss": 1.4172428846359253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8198750328022164, "compression/movement_sparsity/importance_threshold": -0.0012812465739844136, "compression/movement_sparsity/linear_layer_sparsity": 0.8084739121152595, "compression/movement_sparsity/model_sparsity": 0.7807003460774334, "compression_loss": 87.50286102294922, "distillation_loss": 2.8731470108032227, "epoch": 3.31, "learning_rate": 3.718418333802949e-05, "loss": 91.1768, "step": 3912, "task_loss": 1.4279749393463135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8201442778505927, "compression/movement_sparsity/importance_threshold": -0.0012793314075243657, "compression/movement_sparsity/linear_layer_sparsity": 0.8086649611291213, "compression/movement_sparsity/model_sparsity": 0.7808848319699336, "compression_loss": 87.53105163574219, "distillation_loss": 3.9574456214904785, "epoch": 3.31, "learning_rate": 3.717948717948718e-05, "loss": 91.1165, "step": 3913, "task_loss": 2.157353401184082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8204132544592619, "compression/movement_sparsity/importance_threshold": -0.0012774181505023066, "compression/movement_sparsity/linear_layer_sparsity": 0.8089528701566893, "compression/movement_sparsity/model_sparsity": 0.7811628504366928, "compression_loss": 87.55925750732422, "distillation_loss": 2.8792953491210938, "epoch": 3.31, "learning_rate": 3.717479102094487e-05, "loss": 91.0415, "step": 3914, "task_loss": 1.5656218528747559 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8206819627621096, "compression/movement_sparsity/importance_threshold": -0.0012755068019658948, "compression/movement_sparsity/linear_layer_sparsity": 0.8090798983145141, "compression/movement_sparsity/model_sparsity": 0.7812855147865121, "compression_loss": 87.58740234375, "distillation_loss": 3.9262967109680176, "epoch": 3.31, "learning_rate": 3.717009486240255e-05, "loss": 91.4299, "step": 3915, "task_loss": 2.3389925956726074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8209504028930203, "compression/movement_sparsity/importance_threshold": -0.0012735973609627948, "compression/movement_sparsity/linear_layer_sparsity": 0.8092535141952923, "compression/movement_sparsity/model_sparsity": 0.7814531664276806, "compression_loss": 87.61558532714844, "distillation_loss": 4.660613536834717, "epoch": 3.31, "learning_rate": 3.7165398703860245e-05, "loss": 91.6911, "step": 3916, "task_loss": 3.2148261070251465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8212185749858798, "compression/movement_sparsity/importance_threshold": -0.0012716898265406678, "compression/movement_sparsity/linear_layer_sparsity": 0.8094525404773025, "compression/movement_sparsity/model_sparsity": 0.7816453555446272, "compression_loss": 87.6436538696289, "distillation_loss": 3.893796920776367, "epoch": 3.31, "learning_rate": 3.716070254531793e-05, "loss": 91.3043, "step": 3917, "task_loss": 1.5712782144546509 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8214864791745726, "compression/movement_sparsity/importance_threshold": -0.0012697841977471766, "compression/movement_sparsity/linear_layer_sparsity": 0.8096233661028539, "compression/movement_sparsity/model_sparsity": 0.7818103127844198, "compression_loss": 87.67173767089844, "distillation_loss": 3.9875268936157227, "epoch": 3.31, "learning_rate": 3.7156006386775624e-05, "loss": 91.0906, "step": 3918, "task_loss": 1.6825100183486938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8217541155929845, "compression/movement_sparsity/importance_threshold": -0.0012678804736299797, "compression/movement_sparsity/linear_layer_sparsity": 0.8098350081542228, "compression/movement_sparsity/model_sparsity": 0.7820146842802371, "compression_loss": 87.69979095458984, "distillation_loss": 5.328897953033447, "epoch": 3.31, "learning_rate": 3.7151310228233304e-05, "loss": 92.1222, "step": 3919, "task_loss": 3.1344170570373535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.822021484375, "compression/movement_sparsity/importance_threshold": -0.0012659786532367434, "compression/movement_sparsity/linear_layer_sparsity": 0.8101317529900088, "compression/movement_sparsity/model_sparsity": 0.7823012350180201, "compression_loss": 87.72774505615234, "distillation_loss": 3.5134711265563965, "epoch": 3.31, "learning_rate": 3.714661406969099e-05, "loss": 91.8625, "step": 3920, "task_loss": 1.9426709413528442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8222885856545046, "compression/movement_sparsity/importance_threshold": -0.001264078735615127, "compression/movement_sparsity/linear_layer_sparsity": 0.810423847400417, "compression/movement_sparsity/model_sparsity": 0.7825832950868431, "compression_loss": 87.75576782226562, "distillation_loss": 3.539323329925537, "epoch": 3.31, "learning_rate": 3.7141917911148683e-05, "loss": 91.7454, "step": 3921, "task_loss": 1.5118719339370728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8225554195653835, "compression/movement_sparsity/importance_threshold": -0.0012621807198127926, "compression/movement_sparsity/linear_layer_sparsity": 0.8104930671935432, "compression/movement_sparsity/model_sparsity": 0.7826501369671305, "compression_loss": 87.78372955322266, "distillation_loss": 3.920248031616211, "epoch": 3.32, "learning_rate": 3.713722175260637e-05, "loss": 91.5099, "step": 3922, "task_loss": 1.8925237655639648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8228219862415216, "compression/movement_sparsity/importance_threshold": -0.001260284604877402, "compression/movement_sparsity/linear_layer_sparsity": 0.8106440748524837, "compression/movement_sparsity/model_sparsity": 0.7827959570484325, "compression_loss": 87.8116226196289, "distillation_loss": 5.789847373962402, "epoch": 3.32, "learning_rate": 3.7132525594064056e-05, "loss": 92.7169, "step": 3923, "task_loss": 2.7496049404144287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8230882858168044, "compression/movement_sparsity/importance_threshold": -0.0012583903898566165, "compression/movement_sparsity/linear_layer_sparsity": 0.8110392059954333, "compression/movement_sparsity/model_sparsity": 0.7831775142210562, "compression_loss": 87.83960723876953, "distillation_loss": 2.9214227199554443, "epoch": 3.32, "learning_rate": 3.712782943552174e-05, "loss": 91.8318, "step": 3924, "task_loss": 2.2500569820404053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8233543184251164, "compression/movement_sparsity/importance_threshold": -0.0012564980737981013, "compression/movement_sparsity/linear_layer_sparsity": 0.8112679711515274, "compression/movement_sparsity/model_sparsity": 0.7833984205902745, "compression_loss": 87.86744689941406, "distillation_loss": 3.5704662799835205, "epoch": 3.32, "learning_rate": 3.7123133276979436e-05, "loss": 91.7502, "step": 3925, "task_loss": 2.308192491531372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8236200842003436, "compression/movement_sparsity/importance_threshold": -0.0012546076557495124, "compression/movement_sparsity/linear_layer_sparsity": 0.8114237007808517, "compression/movement_sparsity/model_sparsity": 0.7835488004277512, "compression_loss": 87.89535522460938, "distillation_loss": 5.481163024902344, "epoch": 3.32, "learning_rate": 3.711843711843712e-05, "loss": 92.4619, "step": 3926, "task_loss": 2.412649154663086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8238855832763703, "compression/movement_sparsity/importance_threshold": -0.001252719134758518, "compression/movement_sparsity/linear_layer_sparsity": 0.8116926980785492, "compression/movement_sparsity/model_sparsity": 0.7838085568407402, "compression_loss": 87.9232177734375, "distillation_loss": 2.259784698486328, "epoch": 3.32, "learning_rate": 3.711374095989481e-05, "loss": 91.1581, "step": 3927, "task_loss": 0.9908421039581299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8241508157870824, "compression/movement_sparsity/importance_threshold": -0.0012508325098727737, "compression/movement_sparsity/linear_layer_sparsity": 0.8119025634289404, "compression/movement_sparsity/model_sparsity": 0.7840112126707242, "compression_loss": 87.95098876953125, "distillation_loss": 4.173953533172607, "epoch": 3.32, "learning_rate": 3.7109044801352495e-05, "loss": 92.2321, "step": 3928, "task_loss": 3.2770626544952393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8244157818663643, "compression/movement_sparsity/importance_threshold": -0.0012489477801399477, "compression/movement_sparsity/linear_layer_sparsity": 0.8121899477931322, "compression/movement_sparsity/model_sparsity": 0.7842887244979084, "compression_loss": 87.97876739501953, "distillation_loss": 4.336331844329834, "epoch": 3.32, "learning_rate": 3.710434864281018e-05, "loss": 92.4408, "step": 3929, "task_loss": 2.1479907035827637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8246804816481018, "compression/movement_sparsity/importance_threshold": -0.0012470649446076959, "compression/movement_sparsity/linear_layer_sparsity": 0.8124594339817028, "compression/movement_sparsity/model_sparsity": 0.784548953006865, "compression_loss": 88.00651550292969, "distillation_loss": 3.396235466003418, "epoch": 3.32, "learning_rate": 3.7099652484267874e-05, "loss": 91.7278, "step": 3930, "task_loss": 1.754989743232727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8249449152661795, "compression/movement_sparsity/importance_threshold": -0.0012451840023236854, "compression/movement_sparsity/linear_layer_sparsity": 0.8127335228989807, "compression/movement_sparsity/model_sparsity": 0.7848136261266383, "compression_loss": 88.03423309326172, "distillation_loss": 2.7928194999694824, "epoch": 3.32, "learning_rate": 3.709495632572556e-05, "loss": 91.3861, "step": 3931, "task_loss": 1.8501750230789185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8252090828544829, "compression/movement_sparsity/importance_threshold": -0.0012433049523355747, "compression/movement_sparsity/linear_layer_sparsity": 0.8129673081296495, "compression/movement_sparsity/model_sparsity": 0.7850393801154261, "compression_loss": 88.06192016601562, "distillation_loss": 3.955631732940674, "epoch": 3.32, "learning_rate": 3.709026016718325e-05, "loss": 92.2217, "step": 3932, "task_loss": 1.2766752243041992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8254729845468971, "compression/movement_sparsity/importance_threshold": -0.001241427793691026, "compression/movement_sparsity/linear_layer_sparsity": 0.8131442747015333, "compression/movement_sparsity/model_sparsity": 0.7852102673411528, "compression_loss": 88.0895767211914, "distillation_loss": 3.7308123111724854, "epoch": 3.32, "learning_rate": 3.708556400864093e-05, "loss": 91.7448, "step": 3933, "task_loss": 1.4670004844665527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8257366204773071, "compression/movement_sparsity/importance_threshold": -0.0012395525254377017, "compression/movement_sparsity/linear_layer_sparsity": 0.8133981283063332, "compression/movement_sparsity/model_sparsity": 0.7854554002936829, "compression_loss": 88.1171875, "distillation_loss": 3.516702651977539, "epoch": 3.33, "learning_rate": 3.708086785009862e-05, "loss": 92.4058, "step": 3934, "task_loss": 1.8422495126724243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8259999907795982, "compression/movement_sparsity/importance_threshold": -0.001237679146623264, "compression/movement_sparsity/linear_layer_sparsity": 0.8137202000741779, "compression/movement_sparsity/model_sparsity": 0.7857664079054933, "compression_loss": 88.14482116699219, "distillation_loss": 3.85758113861084, "epoch": 3.33, "learning_rate": 3.707617169155631e-05, "loss": 92.2631, "step": 3935, "task_loss": 2.502875566482544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8262630955876553, "compression/movement_sparsity/importance_threshold": -0.0012358076562953742, "compression/movement_sparsity/linear_layer_sparsity": 0.8139153867742093, "compression/movement_sparsity/model_sparsity": 0.7859548893419143, "compression_loss": 88.17237854003906, "distillation_loss": 3.2642111778259277, "epoch": 3.33, "learning_rate": 3.707147553301399e-05, "loss": 91.9552, "step": 3936, "task_loss": 2.3661341667175293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8265259350353639, "compression/movement_sparsity/importance_threshold": -0.001233938053501694, "compression/movement_sparsity/linear_layer_sparsity": 0.8142119765958161, "compression/movement_sparsity/model_sparsity": 0.7862412903907319, "compression_loss": 88.199951171875, "distillation_loss": 2.9768433570861816, "epoch": 3.33, "learning_rate": 3.7066779374471685e-05, "loss": 92.5669, "step": 3937, "task_loss": 1.6614127159118652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8267885092566086, "compression/movement_sparsity/importance_threshold": -0.001232070337289888, "compression/movement_sparsity/linear_layer_sparsity": 0.8143450503066324, "compression/movement_sparsity/model_sparsity": 0.7863697926101989, "compression_loss": 88.22745513916016, "distillation_loss": 3.5381999015808105, "epoch": 3.33, "learning_rate": 3.706208321592937e-05, "loss": 91.9557, "step": 3938, "task_loss": 1.865813970565796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8270508183852753, "compression/movement_sparsity/importance_threshold": -0.001230204506707612, "compression/movement_sparsity/linear_layer_sparsity": 0.8145182846140462, "compression/movement_sparsity/model_sparsity": 0.786537075786222, "compression_loss": 88.25495147705078, "distillation_loss": 4.0775861740112305, "epoch": 3.33, "learning_rate": 3.705738705738706e-05, "loss": 92.5053, "step": 3939, "task_loss": 1.951248288154602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8273128625552484, "compression/movement_sparsity/importance_threshold": -0.0012283405608025343, "compression/movement_sparsity/linear_layer_sparsity": 0.8147943052464811, "compression/movement_sparsity/model_sparsity": 0.786803614260794, "compression_loss": 88.2824478149414, "distillation_loss": 4.029210090637207, "epoch": 3.33, "learning_rate": 3.7052690898844744e-05, "loss": 92.3654, "step": 3940, "task_loss": 1.780691146850586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8275746419004133, "compression/movement_sparsity/importance_threshold": -0.0012264784986223132, "compression/movement_sparsity/linear_layer_sparsity": 0.8149797856740569, "compression/movement_sparsity/model_sparsity": 0.7869827228650781, "compression_loss": 88.30989074707031, "distillation_loss": 4.2074503898620605, "epoch": 3.33, "learning_rate": 3.704799474030243e-05, "loss": 92.2405, "step": 3941, "task_loss": 2.186603546142578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8278361565546555, "compression/movement_sparsity/importance_threshold": -0.0012246183192146107, "compression/movement_sparsity/linear_layer_sparsity": 0.8152324826345961, "compression/movement_sparsity/model_sparsity": 0.7872267389076361, "compression_loss": 88.33731842041016, "distillation_loss": 4.235222339630127, "epoch": 3.33, "learning_rate": 3.7043298581760124e-05, "loss": 91.6213, "step": 3942, "task_loss": 2.908998727798462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8280974066518596, "compression/movement_sparsity/importance_threshold": -0.0012227600216270897, "compression/movement_sparsity/linear_layer_sparsity": 0.8154358970102964, "compression/movement_sparsity/model_sparsity": 0.7874231653737551, "compression_loss": 88.36467742919922, "distillation_loss": 3.7434661388397217, "epoch": 3.33, "learning_rate": 3.703860242321781e-05, "loss": 91.596, "step": 3943, "task_loss": 1.8157528638839722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8283583923259111, "compression/movement_sparsity/importance_threshold": -0.0012209036049074112, "compression/movement_sparsity/linear_layer_sparsity": 0.8157539145611449, "compression/movement_sparsity/model_sparsity": 0.7877302580433955, "compression_loss": 88.39205932617188, "distillation_loss": 3.402312755584717, "epoch": 3.33, "learning_rate": 3.7033906264675496e-05, "loss": 91.5434, "step": 3944, "task_loss": 2.9084417819976807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.828619113710695, "compression/movement_sparsity/importance_threshold": -0.0012190490681032382, "compression/movement_sparsity/linear_layer_sparsity": 0.8159932326055966, "compression/movement_sparsity/model_sparsity": 0.787961354776792, "compression_loss": 88.41934967041016, "distillation_loss": 4.678489685058594, "epoch": 3.33, "learning_rate": 3.702921010613318e-05, "loss": 92.9037, "step": 3945, "task_loss": 2.272563934326172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8288795709400965, "compression/movement_sparsity/importance_threshold": -0.0012171964102622307, "compression/movement_sparsity/linear_layer_sparsity": 0.8162278167554969, "compression/movement_sparsity/model_sparsity": 0.788187880239478, "compression_loss": 88.44664764404297, "distillation_loss": 3.485626697540283, "epoch": 3.34, "learning_rate": 3.702451394759087e-05, "loss": 92.4483, "step": 3946, "task_loss": 2.155290126800537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8291397641480005, "compression/movement_sparsity/importance_threshold": -0.0012153456304320534, "compression/movement_sparsity/linear_layer_sparsity": 0.816370882918792, "compression/movement_sparsity/model_sparsity": 0.7883260316399409, "compression_loss": 88.4738998413086, "distillation_loss": 3.5954575538635254, "epoch": 3.34, "learning_rate": 3.701981778904856e-05, "loss": 92.6638, "step": 3947, "task_loss": 1.4485290050506592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8293996934682926, "compression/movement_sparsity/importance_threshold": -0.0012134967276603648, "compression/movement_sparsity/linear_layer_sparsity": 0.8165057571789213, "compression/movement_sparsity/model_sparsity": 0.7884562725543128, "compression_loss": 88.50118255615234, "distillation_loss": 2.490757703781128, "epoch": 3.34, "learning_rate": 3.701512163050625e-05, "loss": 93.0258, "step": 3948, "task_loss": 1.4213200807571411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8296593590348574, "compression/movement_sparsity/importance_threshold": -0.0012116497009948294, "compression/movement_sparsity/linear_layer_sparsity": 0.8166949340984643, "compression/movement_sparsity/model_sparsity": 0.7886389506646933, "compression_loss": 88.52832794189453, "distillation_loss": 4.205198287963867, "epoch": 3.34, "learning_rate": 3.7010425471963935e-05, "loss": 92.5215, "step": 3949, "task_loss": 1.6041842699050903 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8299187609815805, "compression/movement_sparsity/importance_threshold": -0.0012098045494831084, "compression/movement_sparsity/linear_layer_sparsity": 0.8168971202848979, "compression/movement_sparsity/model_sparsity": 0.7888341911336255, "compression_loss": 88.55549621582031, "distillation_loss": 4.362962245941162, "epoch": 3.34, "learning_rate": 3.700572931342162e-05, "loss": 92.3629, "step": 3950, "task_loss": 1.3743550777435303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8301778994423469, "compression/movement_sparsity/importance_threshold": -0.001207961272172861, "compression/movement_sparsity/linear_layer_sparsity": 0.8170929031933112, "compression/movement_sparsity/model_sparsity": 0.7890232482968361, "compression_loss": 88.5826416015625, "distillation_loss": 3.158064365386963, "epoch": 3.34, "learning_rate": 3.7001033154879314e-05, "loss": 91.1871, "step": 3951, "task_loss": 1.5358991622924805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8304367745510416, "compression/movement_sparsity/importance_threshold": -0.0012061198681117538, "compression/movement_sparsity/linear_layer_sparsity": 0.8173496185983437, "compression/movement_sparsity/model_sparsity": 0.7892711447379568, "compression_loss": 88.60972595214844, "distillation_loss": 5.467584133148193, "epoch": 3.34, "learning_rate": 3.6996336996337e-05, "loss": 93.266, "step": 3952, "task_loss": 2.9303481578826904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8306953864415499, "compression/movement_sparsity/importance_threshold": -0.001204280336347444, "compression/movement_sparsity/linear_layer_sparsity": 0.8175351705709253, "compression/movement_sparsity/model_sparsity": 0.7894503224294556, "compression_loss": 88.63675689697266, "distillation_loss": 3.524061679840088, "epoch": 3.34, "learning_rate": 3.699164083779468e-05, "loss": 92.5139, "step": 3953, "task_loss": 1.514470100402832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8309537352477568, "compression/movement_sparsity/importance_threshold": -0.0012024426759275975, "compression/movement_sparsity/linear_layer_sparsity": 0.8177672029489516, "compression/movement_sparsity/model_sparsity": 0.7896743837814816, "compression_loss": 88.66378784179688, "distillation_loss": 4.488197326660156, "epoch": 3.34, "learning_rate": 3.698694467925237e-05, "loss": 92.6558, "step": 3954, "task_loss": 1.5625348091125488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8312118211035477, "compression/movement_sparsity/importance_threshold": -0.0012006068858998733, "compression/movement_sparsity/linear_layer_sparsity": 0.8180317167596179, "compression/movement_sparsity/model_sparsity": 0.7899298107290118, "compression_loss": 88.69076538085938, "distillation_loss": 4.302875518798828, "epoch": 3.34, "learning_rate": 3.698224852071006e-05, "loss": 92.2912, "step": 3955, "task_loss": 1.7860223054885864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8314696441428073, "compression/movement_sparsity/importance_threshold": -0.0011987729653119353, "compression/movement_sparsity/linear_layer_sparsity": 0.8182689838472363, "compression/movement_sparsity/model_sparsity": 0.7901589269622517, "compression_loss": 88.71772766113281, "distillation_loss": 3.443291664123535, "epoch": 3.34, "learning_rate": 3.697755236216775e-05, "loss": 92.1619, "step": 3956, "task_loss": 1.8134859800338745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8317272044994214, "compression/movement_sparsity/importance_threshold": -0.0011969409132114419, "compression/movement_sparsity/linear_layer_sparsity": 0.8185505134451191, "compression/movement_sparsity/model_sparsity": 0.7904307851523608, "compression_loss": 88.74467468261719, "distillation_loss": 3.753748655319214, "epoch": 3.34, "learning_rate": 3.697285620362544e-05, "loss": 92.2644, "step": 3957, "task_loss": 1.581174373626709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8319845023072744, "compression/movement_sparsity/importance_threshold": -0.0011951107286460603, "compression/movement_sparsity/linear_layer_sparsity": 0.8186716152916289, "compression/movement_sparsity/model_sparsity": 0.7905477267778901, "compression_loss": 88.77157592773438, "distillation_loss": 3.642509937286377, "epoch": 3.35, "learning_rate": 3.6968160045083125e-05, "loss": 93.2085, "step": 3958, "task_loss": 1.8202385902404785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.832241537700252, "compression/movement_sparsity/importance_threshold": -0.0011932824106634472, "compression/movement_sparsity/linear_layer_sparsity": 0.8188674220483774, "compression/movement_sparsity/model_sparsity": 0.7907368069701722, "compression_loss": 88.79847717285156, "distillation_loss": 5.0958356857299805, "epoch": 3.35, "learning_rate": 3.696346388654081e-05, "loss": 93.3075, "step": 3959, "task_loss": 2.3871350288391113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8324983108122389, "compression/movement_sparsity/importance_threshold": -0.0011914559583112681, "compression/movement_sparsity/linear_layer_sparsity": 0.8190670922354399, "compression/movement_sparsity/model_sparsity": 0.7909296178720518, "compression_loss": 88.82534790039062, "distillation_loss": 4.432785987854004, "epoch": 3.35, "learning_rate": 3.69587677279985e-05, "loss": 93.6683, "step": 3960, "task_loss": 1.8968230485916138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8327548217771207, "compression/movement_sparsity/importance_threshold": -0.0011896313706371824, "compression/movement_sparsity/linear_layer_sparsity": 0.8192187676477681, "compression/movement_sparsity/model_sparsity": 0.7910760827673584, "compression_loss": 88.85216522216797, "distillation_loss": 4.3654890060424805, "epoch": 3.35, "learning_rate": 3.695407156945619e-05, "loss": 92.8433, "step": 3961, "task_loss": 2.596330165863037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8330110707287821, "compression/movement_sparsity/importance_threshold": -0.0011878086466888538, "compression/movement_sparsity/linear_layer_sparsity": 0.8194126069168567, "compression/movement_sparsity/model_sparsity": 0.7912632630612344, "compression_loss": 88.87895202636719, "distillation_loss": 4.008730888366699, "epoch": 3.35, "learning_rate": 3.694937541091387e-05, "loss": 93.0534, "step": 3962, "task_loss": 2.976942777633667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8332670578011085, "compression/movement_sparsity/importance_threshold": -0.0011859877855139424, "compression/movement_sparsity/linear_layer_sparsity": 0.819508644162996, "compression/movement_sparsity/model_sparsity": 0.7913560011325237, "compression_loss": 88.90576171875, "distillation_loss": 2.971559524536133, "epoch": 3.35, "learning_rate": 3.6944679252371564e-05, "loss": 92.2263, "step": 3963, "task_loss": 1.6823859214782715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8335227831279851, "compression/movement_sparsity/importance_threshold": -0.0011841687861601112, "compression/movement_sparsity/linear_layer_sparsity": 0.8196706339803291, "compression/movement_sparsity/model_sparsity": 0.7915124261012925, "compression_loss": 88.9324951171875, "distillation_loss": 4.766303062438965, "epoch": 3.35, "learning_rate": 3.693998309382925e-05, "loss": 93.4035, "step": 3964, "task_loss": 3.60536789894104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8337782468432968, "compression/movement_sparsity/importance_threshold": -0.0011823516476750229, "compression/movement_sparsity/linear_layer_sparsity": 0.8198825264392184, "compression/movement_sparsity/model_sparsity": 0.7917170394023615, "compression_loss": 88.95922088623047, "distillation_loss": 4.010738372802734, "epoch": 3.35, "learning_rate": 3.6935286935286937e-05, "loss": 92.9386, "step": 3965, "task_loss": 2.2383551597595215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.834033449080929, "compression/movement_sparsity/importance_threshold": -0.001180536369106337, "compression/movement_sparsity/linear_layer_sparsity": 0.8200597076461197, "compression/movement_sparsity/model_sparsity": 0.7918881338897326, "compression_loss": 88.98590087890625, "distillation_loss": 3.020983934402466, "epoch": 3.35, "learning_rate": 3.693059077674462e-05, "loss": 92.6175, "step": 3966, "task_loss": 2.4204883575439453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8342883899747666, "compression/movement_sparsity/importance_threshold": -0.001178722949501717, "compression/movement_sparsity/linear_layer_sparsity": 0.8202054686713005, "compression/movement_sparsity/model_sparsity": 0.7920288875752851, "compression_loss": 89.01261901855469, "distillation_loss": 4.074035167694092, "epoch": 3.35, "learning_rate": 3.692589461820231e-05, "loss": 93.3814, "step": 3967, "task_loss": 2.4954898357391357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8345430696586951, "compression/movement_sparsity/importance_threshold": -0.0011769113879088232, "compression/movement_sparsity/linear_layer_sparsity": 0.8203204057231426, "compression/movement_sparsity/model_sparsity": 0.7921398761858086, "compression_loss": 89.03929138183594, "distillation_loss": 4.5692596435546875, "epoch": 3.35, "learning_rate": 3.692119845966e-05, "loss": 93.0013, "step": 3968, "task_loss": 2.4555137157440186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.834797488266599, "compression/movement_sparsity/importance_threshold": -0.0011751016833753203, "compression/movement_sparsity/linear_layer_sparsity": 0.8206735757201815, "compression/movement_sparsity/model_sparsity": 0.7924809137069714, "compression_loss": 89.06587219238281, "distillation_loss": 5.041815757751465, "epoch": 3.35, "learning_rate": 3.691650230111769e-05, "loss": 93.0914, "step": 3969, "task_loss": 3.7404656410217285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8350516459323641, "compression/movement_sparsity/importance_threshold": -0.0011732938349488676, "compression/movement_sparsity/linear_layer_sparsity": 0.8209328786695911, "compression/movement_sparsity/model_sparsity": 0.7927313088023594, "compression_loss": 89.09248352050781, "distillation_loss": 3.642277240753174, "epoch": 3.36, "learning_rate": 3.6911806142575375e-05, "loss": 93.4213, "step": 3970, "task_loss": 0.907159149646759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8353055427898752, "compression/movement_sparsity/importance_threshold": -0.0011714878416771279, "compression/movement_sparsity/linear_layer_sparsity": 0.8212132277748778, "compression/movement_sparsity/model_sparsity": 0.7930020270534249, "compression_loss": 89.11905670166016, "distillation_loss": 5.883290767669678, "epoch": 3.36, "learning_rate": 3.690710998403306e-05, "loss": 93.512, "step": 3971, "task_loss": 2.9608163833618164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8355591789730175, "compression/movement_sparsity/importance_threshold": -0.001169683702607764, "compression/movement_sparsity/linear_layer_sparsity": 0.8214228546419163, "compression/movement_sparsity/model_sparsity": 0.7932044525926929, "compression_loss": 89.14557647705078, "distillation_loss": 5.2603373527526855, "epoch": 3.36, "learning_rate": 3.690241382549075e-05, "loss": 93.1825, "step": 3972, "task_loss": 3.147740125656128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8358125546156762, "compression/movement_sparsity/importance_threshold": -0.0011678814167884353, "compression/movement_sparsity/linear_layer_sparsity": 0.8215896260504715, "compression/movement_sparsity/model_sparsity": 0.7933654948903154, "compression_loss": 89.172119140625, "distillation_loss": 5.386739730834961, "epoch": 3.36, "learning_rate": 3.689771766694844e-05, "loss": 93.5564, "step": 3973, "task_loss": 2.6244308948516846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8360656698517362, "compression/movement_sparsity/importance_threshold": -0.0011660809832668064, "compression/movement_sparsity/linear_layer_sparsity": 0.821810318545076, "compression/movement_sparsity/model_sparsity": 0.7935786059188007, "compression_loss": 89.19866180419922, "distillation_loss": 4.671595573425293, "epoch": 3.36, "learning_rate": 3.689302150840613e-05, "loss": 93.3166, "step": 3974, "task_loss": 2.673093795776367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8363185248150832, "compression/movement_sparsity/importance_threshold": -0.0011642824010905358, "compression/movement_sparsity/linear_layer_sparsity": 0.8220017014356317, "compression/movement_sparsity/model_sparsity": 0.7937634142183031, "compression_loss": 89.22515869140625, "distillation_loss": 3.0290629863739014, "epoch": 3.36, "learning_rate": 3.6888325349863814e-05, "loss": 93.2463, "step": 3975, "task_loss": 2.518005609512329 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8365711196396016, "compression/movement_sparsity/importance_threshold": -0.0011624856693072897, "compression/movement_sparsity/linear_layer_sparsity": 0.8222763865612913, "compression/movement_sparsity/model_sparsity": 0.7940286630648661, "compression_loss": 89.25163269042969, "distillation_loss": 4.65767240524292, "epoch": 3.36, "learning_rate": 3.68836291913215e-05, "loss": 92.7748, "step": 3976, "task_loss": 3.0243079662323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8368234544591773, "compression/movement_sparsity/importance_threshold": -0.001160690786964725, "compression/movement_sparsity/linear_layer_sparsity": 0.8224814822446282, "compression/movement_sparsity/model_sparsity": 0.7942267130805323, "compression_loss": 89.27803802490234, "distillation_loss": 3.3954200744628906, "epoch": 3.36, "learning_rate": 3.6878933032779186e-05, "loss": 93.1904, "step": 3977, "task_loss": 2.1713430881500244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8370755294076948, "compression/movement_sparsity/importance_threshold": -0.0011588977531105079, "compression/movement_sparsity/linear_layer_sparsity": 0.822756692033664, "compression/movement_sparsity/model_sparsity": 0.7944924685666702, "compression_loss": 89.30450439453125, "distillation_loss": 3.5398621559143066, "epoch": 3.36, "learning_rate": 3.687423687423688e-05, "loss": 93.7278, "step": 3978, "task_loss": 2.2671945095062256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8373273446190396, "compression/movement_sparsity/importance_threshold": -0.0011571065667922986, "compression/movement_sparsity/linear_layer_sparsity": 0.8230802781707983, "compression/movement_sparsity/model_sparsity": 0.7948049385245267, "compression_loss": 89.3308334350586, "distillation_loss": 4.400421619415283, "epoch": 3.36, "learning_rate": 3.686954071569456e-05, "loss": 92.9681, "step": 3979, "task_loss": 1.9874955415725708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8375789002270966, "compression/movement_sparsity/importance_threshold": -0.0011553172270577592, "compression/movement_sparsity/linear_layer_sparsity": 0.8232153551417773, "compression/movement_sparsity/model_sparsity": 0.7949353751860072, "compression_loss": 89.35721588134766, "distillation_loss": 3.9217567443847656, "epoch": 3.36, "learning_rate": 3.686484455715225e-05, "loss": 93.1225, "step": 3980, "task_loss": 1.7651503086090088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8378301963657511, "compression/movement_sparsity/importance_threshold": -0.0011535297329545507, "compression/movement_sparsity/linear_layer_sparsity": 0.8234402926400604, "compression/movement_sparsity/model_sparsity": 0.7951525853892355, "compression_loss": 89.38356018066406, "distillation_loss": 5.212862968444824, "epoch": 3.36, "learning_rate": 3.686014839860994e-05, "loss": 93.979, "step": 3981, "task_loss": 1.8452118635177612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8380812331688884, "compression/movement_sparsity/importance_threshold": -0.0011517440835303342, "compression/movement_sparsity/linear_layer_sparsity": 0.8235988125246115, "compression/movement_sparsity/model_sparsity": 0.7953056596280881, "compression_loss": 89.40982818603516, "distillation_loss": 5.361797332763672, "epoch": 3.37, "learning_rate": 3.685545224006763e-05, "loss": 93.5775, "step": 3982, "task_loss": 3.2236344814300537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8383320107703933, "compression/movement_sparsity/importance_threshold": -0.0011499602778327744, "compression/movement_sparsity/linear_layer_sparsity": 0.8238242627621027, "compression/movement_sparsity/model_sparsity": 0.7955233649563554, "compression_loss": 89.43607330322266, "distillation_loss": 4.034379482269287, "epoch": 3.37, "learning_rate": 3.685075608152531e-05, "loss": 93.9679, "step": 3983, "task_loss": 1.7008881568908691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8385825293041512, "compression/movement_sparsity/importance_threshold": -0.0011481783149095313, "compression/movement_sparsity/linear_layer_sparsity": 0.824199170516742, "compression/movement_sparsity/model_sparsity": 0.7958853934762716, "compression_loss": 89.46229553222656, "distillation_loss": 4.5586724281311035, "epoch": 3.37, "learning_rate": 3.6846059922983e-05, "loss": 93.0366, "step": 3984, "task_loss": 3.5217268466949463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.838832788904047, "compression/movement_sparsity/importance_threshold": -0.001146398193808267, "compression/movement_sparsity/linear_layer_sparsity": 0.8242773930564332, "compression/movement_sparsity/model_sparsity": 0.7959609288310838, "compression_loss": 89.48844909667969, "distillation_loss": 4.441836357116699, "epoch": 3.37, "learning_rate": 3.684136376444069e-05, "loss": 93.2525, "step": 3985, "task_loss": 3.2548880577087402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8390827897039662, "compression/movement_sparsity/importance_threshold": -0.0011446199135766427, "compression/movement_sparsity/linear_layer_sparsity": 0.8244106217814287, "compression/movement_sparsity/model_sparsity": 0.7960895807395162, "compression_loss": 89.5145263671875, "distillation_loss": 4.475978851318359, "epoch": 3.37, "learning_rate": 3.683666760589838e-05, "loss": 93.4931, "step": 3986, "task_loss": 2.9700369834899902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8393325318377935, "compression/movement_sparsity/importance_threshold": -0.0011428434732623219, "compression/movement_sparsity/linear_layer_sparsity": 0.8245886257558969, "compression/movement_sparsity/model_sparsity": 0.7962614697298571, "compression_loss": 89.54061889648438, "distillation_loss": 4.675412178039551, "epoch": 3.37, "learning_rate": 3.683197144735606e-05, "loss": 94.4058, "step": 3987, "task_loss": 2.2065067291259766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8395820154394146, "compression/movement_sparsity/importance_threshold": -0.001141068871912964, "compression/movement_sparsity/linear_layer_sparsity": 0.8248648848716845, "compression/movement_sparsity/model_sparsity": 0.7965282384951451, "compression_loss": 89.56675720214844, "distillation_loss": 4.418222427368164, "epoch": 3.37, "learning_rate": 3.682727528881375e-05, "loss": 93.5082, "step": 3988, "task_loss": 1.5094621181488037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.839831240642714, "compression/movement_sparsity/importance_threshold": -0.0011392961085762345, "compression/movement_sparsity/linear_layer_sparsity": 0.8250841106936698, "compression/movement_sparsity/model_sparsity": 0.7967399332357277, "compression_loss": 89.59275817871094, "distillation_loss": 4.025981903076172, "epoch": 3.37, "learning_rate": 3.682257913027144e-05, "loss": 93.7071, "step": 3989, "task_loss": 2.2631874084472656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8400802075815771, "compression/movement_sparsity/importance_threshold": -0.0011375251822997928, "compression/movement_sparsity/linear_layer_sparsity": 0.8252304679272324, "compression/movement_sparsity/model_sparsity": 0.7968812626480699, "compression_loss": 89.61878204345703, "distillation_loss": 5.245976448059082, "epoch": 3.37, "learning_rate": 3.681788297172913e-05, "loss": 93.8565, "step": 3990, "task_loss": 2.999668598175049 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8403289163898893, "compression/movement_sparsity/importance_threshold": -0.0011357560921313, "compression/movement_sparsity/linear_layer_sparsity": 0.8253618484062444, "compression/movement_sparsity/model_sparsity": 0.7970081298034541, "compression_loss": 89.64476013183594, "distillation_loss": 5.361252784729004, "epoch": 3.37, "learning_rate": 3.6813186813186815e-05, "loss": 94.2699, "step": 3991, "task_loss": 2.388672351837158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8405773672015353, "compression/movement_sparsity/importance_threshold": -0.0011339888371184196, "compression/movement_sparsity/linear_layer_sparsity": 0.8255765668928634, "compression/movement_sparsity/model_sparsity": 0.7972154720495064, "compression_loss": 89.6707763671875, "distillation_loss": 3.9773621559143066, "epoch": 3.37, "learning_rate": 3.68084906546445e-05, "loss": 93.6771, "step": 3992, "task_loss": 2.3177173137664795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8408255601504008, "compression/movement_sparsity/importance_threshold": -0.0011322234163088111, "compression/movement_sparsity/linear_layer_sparsity": 0.8257050617233074, "compression/movement_sparsity/model_sparsity": 0.7973395526872284, "compression_loss": 89.69667053222656, "distillation_loss": 6.498352527618408, "epoch": 3.38, "learning_rate": 3.680379449610219e-05, "loss": 93.9006, "step": 3993, "task_loss": 3.255223035812378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8410734953703702, "compression/movement_sparsity/importance_threshold": -0.0011304598287501409, "compression/movement_sparsity/linear_layer_sparsity": 0.8258779621540274, "compression/movement_sparsity/model_sparsity": 0.7975065134562492, "compression_loss": 89.7226333618164, "distillation_loss": 4.003569602966309, "epoch": 3.38, "learning_rate": 3.679909833755988e-05, "loss": 93.7773, "step": 3994, "task_loss": 1.8000563383102417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8413211729953295, "compression/movement_sparsity/importance_threshold": -0.0011286980734900648, "compression/movement_sparsity/linear_layer_sparsity": 0.8260522338640256, "compression/movement_sparsity/model_sparsity": 0.7976747983968865, "compression_loss": 89.7485122680664, "distillation_loss": 3.0761704444885254, "epoch": 3.38, "learning_rate": 3.679440217901757e-05, "loss": 94.0664, "step": 3995, "task_loss": 1.6352076530456543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.841568593159163, "compression/movement_sparsity/importance_threshold": -0.0011269381495762516, "compression/movement_sparsity/linear_layer_sparsity": 0.8263133969835862, "compression/movement_sparsity/model_sparsity": 0.7979269897598584, "compression_loss": 89.77439880371094, "distillation_loss": 5.268775939941406, "epoch": 3.38, "learning_rate": 3.6789706020475254e-05, "loss": 93.966, "step": 3996, "task_loss": 2.3639848232269287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8418157559957565, "compression/movement_sparsity/importance_threshold": -0.0011251800560563557, "compression/movement_sparsity/linear_layer_sparsity": 0.8265669763325305, "compression/movement_sparsity/model_sparsity": 0.7981718578780652, "compression_loss": 89.80018615722656, "distillation_loss": 5.026032447814941, "epoch": 3.38, "learning_rate": 3.678500986193294e-05, "loss": 93.8294, "step": 3997, "task_loss": 2.666954755783081 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8420626616389947, "compression/movement_sparsity/importance_threshold": -0.0011234237919780458, "compression/movement_sparsity/linear_layer_sparsity": 0.826725484292914, "compression/movement_sparsity/model_sparsity": 0.798324920602382, "compression_loss": 89.82608032226562, "distillation_loss": 3.1120262145996094, "epoch": 3.38, "learning_rate": 3.6780313703390626e-05, "loss": 93.8439, "step": 3998, "task_loss": 2.209625244140625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.842309310222763, "compression/movement_sparsity/importance_threshold": -0.0011216693563889787, "compression/movement_sparsity/linear_layer_sparsity": 0.8269005787704791, "compression/movement_sparsity/model_sparsity": 0.798494000045989, "compression_loss": 89.85186004638672, "distillation_loss": 5.378230094909668, "epoch": 3.38, "learning_rate": 3.677561754484832e-05, "loss": 93.7036, "step": 3999, "task_loss": 2.092989921569824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8425557018809464, "compression/movement_sparsity/importance_threshold": -0.001119916748336819, "compression/movement_sparsity/linear_layer_sparsity": 0.8270956343046665, "compression/movement_sparsity/model_sparsity": 0.7986823548225161, "compression_loss": 89.877685546875, "distillation_loss": 5.172513008117676, "epoch": 3.38, "learning_rate": 3.6770921386306e-05, "loss": 93.5572, "step": 4000, "task_loss": 3.331024408340454 }, { "epoch": 3.38, "eval_accuracy": 0.5939405940594059, "eval_loss": 93.41417694091797, "eval_runtime": 227.5574, "eval_samples_per_second": 110.961, "eval_steps_per_second": 0.87, "step": 4000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8428018367474303, "compression/movement_sparsity/importance_threshold": -0.001118165966869227, "compression/movement_sparsity/linear_layer_sparsity": 0.8272089258213748, "compression/movement_sparsity/model_sparsity": 0.7987917544271, "compression_loss": 89.90338897705078, "distillation_loss": 4.414364337921143, "epoch": 3.38, "learning_rate": 3.676622522776369e-05, "loss": 93.2801, "step": 4001, "task_loss": 1.922366976737976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8430477149560993, "compression/movement_sparsity/importance_threshold": -0.0011164170110338672, "compression/movement_sparsity/linear_layer_sparsity": 0.8273998436693927, "compression/movement_sparsity/model_sparsity": 0.7989761136597064, "compression_loss": 89.92909240722656, "distillation_loss": 4.370852947235107, "epoch": 3.38, "learning_rate": 3.676152906922138e-05, "loss": 93.4978, "step": 4002, "task_loss": 3.1665468215942383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8432933366408389, "compression/movement_sparsity/importance_threshold": -0.0011146698798783989, "compression/movement_sparsity/linear_layer_sparsity": 0.8274942592287334, "compression/movement_sparsity/model_sparsity": 0.7990672857541277, "compression_loss": 89.95472717285156, "distillation_loss": 4.566342353820801, "epoch": 3.38, "learning_rate": 3.6756832910679065e-05, "loss": 93.8752, "step": 4003, "task_loss": 3.161533832550049 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8435387019355345, "compression/movement_sparsity/importance_threshold": -0.0011129245724504832, "compression/movement_sparsity/linear_layer_sparsity": 0.8275967712978989, "compression/movement_sparsity/model_sparsity": 0.7991662762183533, "compression_loss": 89.98041534423828, "distillation_loss": 4.932188987731934, "epoch": 3.38, "learning_rate": 3.675213675213676e-05, "loss": 94.2149, "step": 4004, "task_loss": 2.009910821914673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8437838109740706, "compression/movement_sparsity/importance_threshold": -0.0011111810877977848, "compression/movement_sparsity/linear_layer_sparsity": 0.8277404217454082, "compression/movement_sparsity/model_sparsity": 0.7993049918310702, "compression_loss": 90.00611114501953, "distillation_loss": 4.273679256439209, "epoch": 3.39, "learning_rate": 3.674744059359444e-05, "loss": 93.8956, "step": 4005, "task_loss": 2.590000867843628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.844028663890333, "compression/movement_sparsity/importance_threshold": -0.001109439424967963, "compression/movement_sparsity/linear_layer_sparsity": 0.8279383987006664, "compression/movement_sparsity/model_sparsity": 0.7994961676688669, "compression_loss": 90.03173828125, "distillation_loss": 4.970494270324707, "epoch": 3.39, "learning_rate": 3.674274443505213e-05, "loss": 93.7747, "step": 4006, "task_loss": 2.0863406658172607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8442732608182064, "compression/movement_sparsity/importance_threshold": -0.0011076995830086814, "compression/movement_sparsity/linear_layer_sparsity": 0.8281199799254253, "compression/movement_sparsity/model_sparsity": 0.7996715110199462, "compression_loss": 90.05731964111328, "distillation_loss": 3.9369025230407715, "epoch": 3.39, "learning_rate": 3.673804827650982e-05, "loss": 94.0699, "step": 4007, "task_loss": 1.9288233518600464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8445176018915762, "compression/movement_sparsity/importance_threshold": -0.0011059615609676004, "compression/movement_sparsity/linear_layer_sparsity": 0.828335556952114, "compression/movement_sparsity/model_sparsity": 0.7998796823125757, "compression_loss": 90.0829086303711, "distillation_loss": 4.422567844390869, "epoch": 3.39, "learning_rate": 3.6733352117967503e-05, "loss": 95.1315, "step": 4008, "task_loss": 2.685410499572754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8447616872443272, "compression/movement_sparsity/importance_threshold": -0.0011042253578923845, "compression/movement_sparsity/linear_layer_sparsity": 0.8284750935617889, "compression/movement_sparsity/model_sparsity": 0.8000144254104434, "compression_loss": 90.10848236083984, "distillation_loss": 6.7333855628967285, "epoch": 3.39, "learning_rate": 3.672865595942519e-05, "loss": 94.2227, "step": 4009, "task_loss": 3.7349231243133545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.845005517010345, "compression/movement_sparsity/importance_threshold": -0.0011024909728306921, "compression/movement_sparsity/linear_layer_sparsity": 0.8286993990791871, "compression/movement_sparsity/model_sparsity": 0.8002310253432745, "compression_loss": 90.1340103149414, "distillation_loss": 4.175456523895264, "epoch": 3.39, "learning_rate": 3.6723959800882876e-05, "loss": 94.2348, "step": 4010, "task_loss": 2.270418167114258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8452490913235142, "compression/movement_sparsity/importance_threshold": -0.001100758404830188, "compression/movement_sparsity/linear_layer_sparsity": 0.8288738973483704, "compression/movement_sparsity/model_sparsity": 0.8003995290600918, "compression_loss": 90.15956115722656, "distillation_loss": 3.3445703983306885, "epoch": 3.39, "learning_rate": 3.671926364234057e-05, "loss": 94.0406, "step": 4011, "task_loss": 3.178112745285034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8454924103177204, "compression/movement_sparsity/importance_threshold": -0.001099027652938533, "compression/movement_sparsity/linear_layer_sparsity": 0.8289236926724177, "compression/movement_sparsity/model_sparsity": 0.8004476137615698, "compression_loss": 90.18498992919922, "distillation_loss": 2.876408815383911, "epoch": 3.39, "learning_rate": 3.6714567483798256e-05, "loss": 93.6244, "step": 4012, "task_loss": 1.6756149530410767 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8457354741268487, "compression/movement_sparsity/importance_threshold": -0.0010972987162033866, "compression/movement_sparsity/linear_layer_sparsity": 0.8290501246218608, "compression/movement_sparsity/model_sparsity": 0.8005697023845993, "compression_loss": 90.21044158935547, "distillation_loss": 4.2868123054504395, "epoch": 3.39, "learning_rate": 3.670987132525594e-05, "loss": 93.5094, "step": 4013, "task_loss": 2.8245856761932373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8459782828847839, "compression/movement_sparsity/importance_threshold": -0.0010955715936724142, "compression/movement_sparsity/linear_layer_sparsity": 0.8291598031157754, "compression/movement_sparsity/model_sparsity": 0.8006756130848375, "compression_loss": 90.23587036132812, "distillation_loss": 3.783398151397705, "epoch": 3.39, "learning_rate": 3.670517516671363e-05, "loss": 94.3047, "step": 4014, "task_loss": 2.899223804473877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8462208367254116, "compression/movement_sparsity/importance_threshold": -0.0010938462843932753, "compression/movement_sparsity/linear_layer_sparsity": 0.8294184263876297, "compression/movement_sparsity/model_sparsity": 0.8009253518516853, "compression_loss": 90.26126098632812, "distillation_loss": 2.9872915744781494, "epoch": 3.39, "learning_rate": 3.6700479008171315e-05, "loss": 93.7451, "step": 4015, "task_loss": 2.771937847137451 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8464631357826164, "compression/movement_sparsity/importance_threshold": -0.0010921227874136335, "compression/movement_sparsity/linear_layer_sparsity": 0.8295937355002122, "compression/movement_sparsity/model_sparsity": 0.8010946385569366, "compression_loss": 90.28659057617188, "distillation_loss": 4.888854503631592, "epoch": 3.39, "learning_rate": 3.669578284962901e-05, "loss": 94.9, "step": 4016, "task_loss": 3.802961587905884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8467051801902841, "compression/movement_sparsity/importance_threshold": -0.001090401101781149, "compression/movement_sparsity/linear_layer_sparsity": 0.8296950193801111, "compression/movement_sparsity/model_sparsity": 0.8011924430239755, "compression_loss": 90.31195831298828, "distillation_loss": 3.8661298751831055, "epoch": 3.4, "learning_rate": 3.669108669108669e-05, "loss": 95.0376, "step": 4017, "task_loss": 3.188614845275879 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8469469700822992, "compression/movement_sparsity/importance_threshold": -0.0010886812265434847, "compression/movement_sparsity/linear_layer_sparsity": 0.8298678482658254, "compression/movement_sparsity/model_sparsity": 0.8013593347057816, "compression_loss": 90.3373031616211, "distillation_loss": 4.053671836853027, "epoch": 3.4, "learning_rate": 3.668639053254438e-05, "loss": 94.5851, "step": 4018, "task_loss": 2.5771842002868652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8471885055925473, "compression/movement_sparsity/importance_threshold": -0.0010869631607483025, "compression/movement_sparsity/linear_layer_sparsity": 0.8301400293162816, "compression/movement_sparsity/model_sparsity": 0.8016221654998278, "compression_loss": 90.36260223388672, "distillation_loss": 5.535055160522461, "epoch": 3.4, "learning_rate": 3.668169437400207e-05, "loss": 94.5927, "step": 4019, "task_loss": 3.1629106998443604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8474297868549132, "compression/movement_sparsity/importance_threshold": -0.0010852469034432644, "compression/movement_sparsity/linear_layer_sparsity": 0.8302409554711515, "compression/movement_sparsity/model_sparsity": 0.8017196245307927, "compression_loss": 90.38789367675781, "distillation_loss": 4.745058059692383, "epoch": 3.4, "learning_rate": 3.667699821545976e-05, "loss": 93.8954, "step": 4020, "task_loss": 2.6244285106658936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8476708140032823, "compression/movement_sparsity/importance_threshold": -0.0010835324536760315, "compression/movement_sparsity/linear_layer_sparsity": 0.8303973290055282, "compression/movement_sparsity/model_sparsity": 0.8018706261532024, "compression_loss": 90.41312408447266, "distillation_loss": 3.4554145336151123, "epoch": 3.4, "learning_rate": 3.6672302056917446e-05, "loss": 94.2755, "step": 4021, "task_loss": 2.1749677658081055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8479115871715397, "compression/movement_sparsity/importance_threshold": -0.001081819810494265, "compression/movement_sparsity/linear_layer_sparsity": 0.8306417744420634, "compression/movement_sparsity/model_sparsity": 0.8021066741369904, "compression_loss": 90.43827819824219, "distillation_loss": 2.875865936279297, "epoch": 3.4, "learning_rate": 3.666760589837513e-05, "loss": 94.3955, "step": 4022, "task_loss": 1.6644054651260376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8481521064935703, "compression/movement_sparsity/importance_threshold": -0.0010801089729456283, "compression/movement_sparsity/linear_layer_sparsity": 0.8307131048128612, "compression/movement_sparsity/model_sparsity": 0.8021755540901134, "compression_loss": 90.46340942382812, "distillation_loss": 3.833807945251465, "epoch": 3.4, "learning_rate": 3.666290973983282e-05, "loss": 94.3307, "step": 4023, "task_loss": 2.276472330093384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8483923721032597, "compression/movement_sparsity/importance_threshold": -0.001078399940077782, "compression/movement_sparsity/linear_layer_sparsity": 0.8309128584690971, "compression/movement_sparsity/model_sparsity": 0.8023684455937435, "compression_loss": 90.48860931396484, "distillation_loss": 4.827242851257324, "epoch": 3.4, "learning_rate": 3.6658213581290505e-05, "loss": 94.6964, "step": 4024, "task_loss": 3.228677272796631 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8486323841344925, "compression/movement_sparsity/importance_threshold": -0.0010766927109383904, "compression/movement_sparsity/linear_layer_sparsity": 0.8310996505551129, "compression/movement_sparsity/model_sparsity": 0.802548820796965, "compression_loss": 90.513671875, "distillation_loss": 3.5236053466796875, "epoch": 3.4, "learning_rate": 3.66535174227482e-05, "loss": 94.8069, "step": 4025, "task_loss": 1.2692914009094238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8488721427211543, "compression/movement_sparsity/importance_threshold": -0.001074987284575112, "compression/movement_sparsity/linear_layer_sparsity": 0.8314052073507819, "compression/movement_sparsity/model_sparsity": 0.8028438807767001, "compression_loss": 90.5386734008789, "distillation_loss": 3.525179147720337, "epoch": 3.4, "learning_rate": 3.664882126420588e-05, "loss": 94.3613, "step": 4026, "task_loss": 1.7244737148284912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8491116479971299, "compression/movement_sparsity/importance_threshold": -0.0010732836600356108, "compression/movement_sparsity/linear_layer_sparsity": 0.8316672290104123, "compression/movement_sparsity/model_sparsity": 0.8030969011862493, "compression_loss": 90.5636978149414, "distillation_loss": 3.807065963745117, "epoch": 3.4, "learning_rate": 3.664412510566357e-05, "loss": 94.8346, "step": 4027, "task_loss": 3.730867385864258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8493509000963049, "compression/movement_sparsity/importance_threshold": -0.0010715818363675467, "compression/movement_sparsity/linear_layer_sparsity": 0.831812274585535, "compression/movement_sparsity/model_sparsity": 0.8032369639996542, "compression_loss": 90.5887451171875, "distillation_loss": 3.0038914680480957, "epoch": 3.4, "learning_rate": 3.663942894712126e-05, "loss": 94.5344, "step": 4028, "task_loss": 2.008230209350586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8495898991525638, "compression/movement_sparsity/importance_threshold": -0.0010698818126185844, "compression/movement_sparsity/linear_layer_sparsity": 0.831918602388344, "compression/movement_sparsity/model_sparsity": 0.803339639115334, "compression_loss": 90.61373138427734, "distillation_loss": 3.9373254776000977, "epoch": 3.41, "learning_rate": 3.6634732788578944e-05, "loss": 94.1266, "step": 4029, "task_loss": 2.4116995334625244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8498286452997922, "compression/movement_sparsity/importance_threshold": -0.001068183587836384, "compression/movement_sparsity/linear_layer_sparsity": 0.8321230303182933, "compression/movement_sparsity/model_sparsity": 0.8035370443169957, "compression_loss": 90.63870239257812, "distillation_loss": 4.354536056518555, "epoch": 3.41, "learning_rate": 3.663003663003663e-05, "loss": 95.151, "step": 4030, "task_loss": 2.4135782718658447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.850067138671875, "compression/movement_sparsity/importance_threshold": -0.0010664871610686077, "compression/movement_sparsity/linear_layer_sparsity": 0.8322595501135562, "compression/movement_sparsity/model_sparsity": 0.8036688742373074, "compression_loss": 90.6636962890625, "distillation_loss": 5.005701065063477, "epoch": 3.41, "learning_rate": 3.6625340471494316e-05, "loss": 95.2993, "step": 4031, "task_loss": 2.5548036098480225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8503053794026975, "compression/movement_sparsity/importance_threshold": -0.0010647925313629174, "compression/movement_sparsity/linear_layer_sparsity": 0.8324308288574778, "compression/movement_sparsity/model_sparsity": 0.8038342690294602, "compression_loss": 90.68854522705078, "distillation_loss": 3.674271583557129, "epoch": 3.41, "learning_rate": 3.662064431295201e-05, "loss": 94.7386, "step": 4032, "task_loss": 3.1273107528686523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8505433676261449, "compression/movement_sparsity/importance_threshold": -0.001063099697766974, "compression/movement_sparsity/linear_layer_sparsity": 0.8324945039126532, "compression/movement_sparsity/model_sparsity": 0.803895756650603, "compression_loss": 90.71340942382812, "distillation_loss": 4.421474456787109, "epoch": 3.41, "learning_rate": 3.6615948154409696e-05, "loss": 95.5391, "step": 4033, "task_loss": 2.230781078338623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.850781103476102, "compression/movement_sparsity/importance_threshold": -0.0010614086593284415, "compression/movement_sparsity/linear_layer_sparsity": 0.8324886729946793, "compression/movement_sparsity/model_sparsity": 0.8038901260425995, "compression_loss": 90.73831176757812, "distillation_loss": 2.82174015045166, "epoch": 3.41, "learning_rate": 3.661125199586738e-05, "loss": 94.96, "step": 4034, "task_loss": 2.670048475265503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8510185870864545, "compression/movement_sparsity/importance_threshold": -0.0010597194150949781, "compression/movement_sparsity/linear_layer_sparsity": 0.8327216235336136, "compression/movement_sparsity/model_sparsity": 0.8041150740138816, "compression_loss": 90.76318359375, "distillation_loss": 3.1359169483184814, "epoch": 3.41, "learning_rate": 3.660655583732507e-05, "loss": 94.2796, "step": 4035, "task_loss": 1.8251010179519653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8512558185910869, "compression/movement_sparsity/importance_threshold": -0.0010580319641142512, "compression/movement_sparsity/linear_layer_sparsity": 0.8330140518207154, "compression/movement_sparsity/model_sparsity": 0.804397456489707, "compression_loss": 90.78795623779297, "distillation_loss": 4.5319414138793945, "epoch": 3.41, "learning_rate": 3.6601859678782755e-05, "loss": 94.9639, "step": 4036, "task_loss": 2.9761857986450195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8514927981238849, "compression/movement_sparsity/importance_threshold": -0.0010563463054339165, "compression/movement_sparsity/linear_layer_sparsity": 0.8332370337555061, "compression/movement_sparsity/model_sparsity": 0.8046127783090649, "compression_loss": 90.81268310546875, "distillation_loss": 3.3305389881134033, "epoch": 3.41, "learning_rate": 3.659716352024045e-05, "loss": 95.3293, "step": 4037, "task_loss": 1.5422327518463135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8517295258187332, "compression/movement_sparsity/importance_threshold": -0.0010546624381016414, "compression/movement_sparsity/linear_layer_sparsity": 0.833498089557558, "compression/movement_sparsity/model_sparsity": 0.8048648660412148, "compression_loss": 90.83747863769531, "distillation_loss": 3.6805334091186523, "epoch": 3.41, "learning_rate": 3.6592467361698134e-05, "loss": 94.7458, "step": 4038, "task_loss": 2.4588510990142822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8519660018095174, "compression/movement_sparsity/importance_threshold": -0.0010529803611650816, "compression/movement_sparsity/linear_layer_sparsity": 0.8336378408022503, "compression/movement_sparsity/model_sparsity": 0.8049998164007267, "compression_loss": 90.8621597290039, "distillation_loss": 2.8288984298706055, "epoch": 3.41, "learning_rate": 3.658777120315582e-05, "loss": 94.5078, "step": 4039, "task_loss": 1.869344711303711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8522022262301221, "compression/movement_sparsity/importance_threshold": -0.001051300073671906, "compression/movement_sparsity/linear_layer_sparsity": 0.8338038967607474, "compression/movement_sparsity/model_sparsity": 0.8051601678262015, "compression_loss": 90.88683319091797, "distillation_loss": 4.839293479919434, "epoch": 3.41, "learning_rate": 3.658307504461351e-05, "loss": 94.9466, "step": 4040, "task_loss": 2.02341890335083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.852438199214433, "compression/movement_sparsity/importance_threshold": -0.0010496215746697698, "compression/movement_sparsity/linear_layer_sparsity": 0.8339674486440409, "compression/movement_sparsity/model_sparsity": 0.8053181011991595, "compression_loss": 90.91146087646484, "distillation_loss": 3.3914551734924316, "epoch": 3.42, "learning_rate": 3.657837888607119e-05, "loss": 94.1889, "step": 4041, "task_loss": 2.0423905849456787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8526739208963346, "compression/movement_sparsity/importance_threshold": -0.001047944863206341, "compression/movement_sparsity/linear_layer_sparsity": 0.8341786495012073, "compression/movement_sparsity/model_sparsity": 0.8055220466571523, "compression_loss": 90.93605041503906, "distillation_loss": 4.015454292297363, "epoch": 3.42, "learning_rate": 3.6573682727528886e-05, "loss": 94.7399, "step": 4042, "task_loss": 1.8290212154388428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8529093914097126, "compression/movement_sparsity/importance_threshold": -0.001046269938329277, "compression/movement_sparsity/linear_layer_sparsity": 0.8343821711944162, "compression/movement_sparsity/model_sparsity": 0.8057185767540935, "compression_loss": 90.9605941772461, "distillation_loss": 3.8800179958343506, "epoch": 3.42, "learning_rate": 3.6568986568986566e-05, "loss": 94.8913, "step": 4043, "task_loss": 2.6456193923950195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.853144610888452, "compression/movement_sparsity/importance_threshold": -0.0010445967990862408, "compression/movement_sparsity/linear_layer_sparsity": 0.8344668685571337, "compression/movement_sparsity/model_sparsity": 0.8058003645018422, "compression_loss": 90.985107421875, "distillation_loss": 4.83154296875, "epoch": 3.42, "learning_rate": 3.656429041044426e-05, "loss": 95.7137, "step": 4044, "task_loss": 2.3226590156555176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8533795794664376, "compression/movement_sparsity/importance_threshold": -0.0010429254445248962, "compression/movement_sparsity/linear_layer_sparsity": 0.8345505046689317, "compression/movement_sparsity/model_sparsity": 0.8058811274559051, "compression_loss": 91.00962829589844, "distillation_loss": 5.082546234130859, "epoch": 3.42, "learning_rate": 3.6559594251901945e-05, "loss": 95.1697, "step": 4045, "task_loss": 2.4870927333831787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8536142972775551, "compression/movement_sparsity/importance_threshold": -0.0010412558736929015, "compression/movement_sparsity/linear_layer_sparsity": 0.8346903036102945, "compression/movement_sparsity/model_sparsity": 0.8060161238735604, "compression_loss": 91.03411102294922, "distillation_loss": 4.890507698059082, "epoch": 3.42, "learning_rate": 3.655489809335964e-05, "loss": 95.551, "step": 4046, "task_loss": 3.945648193359375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8538487644556892, "compression/movement_sparsity/importance_threshold": -0.0010395880856379214, "compression/movement_sparsity/linear_layer_sparsity": 0.8349696510854999, "compression/movement_sparsity/model_sparsity": 0.806285874903619, "compression_loss": 91.05860137939453, "distillation_loss": 6.153520584106445, "epoch": 3.42, "learning_rate": 3.655020193481732e-05, "loss": 96.2323, "step": 4047, "task_loss": 3.110635995864868 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8540829811347255, "compression/movement_sparsity/importance_threshold": -0.0010379220794076152, "compression/movement_sparsity/linear_layer_sparsity": 0.8351022359054431, "compression/movement_sparsity/model_sparsity": 0.8064139050271185, "compression_loss": 91.08307647705078, "distillation_loss": 5.616333484649658, "epoch": 3.42, "learning_rate": 3.6545505776275004e-05, "loss": 96.5384, "step": 4048, "task_loss": 3.54443621635437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8543169474485485, "compression/movement_sparsity/importance_threshold": -0.0010362578540496483, "compression/movement_sparsity/linear_layer_sparsity": 0.8352475318880861, "compression/movement_sparsity/model_sparsity": 0.806554209645775, "compression_loss": 91.10755920410156, "distillation_loss": 6.688626289367676, "epoch": 3.42, "learning_rate": 3.65408096177327e-05, "loss": 95.6493, "step": 4049, "task_loss": 3.5899951457977295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8545506635310439, "compression/movement_sparsity/importance_threshold": -0.0010345954086116793, "compression/movement_sparsity/linear_layer_sparsity": 0.8355925576786297, "compression/movement_sparsity/model_sparsity": 0.8068873827389901, "compression_loss": 91.13200378417969, "distillation_loss": 2.4765028953552246, "epoch": 3.42, "learning_rate": 3.6536113459190384e-05, "loss": 95.4405, "step": 4050, "task_loss": 1.9804224967956543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8547841295160965, "compression/movement_sparsity/importance_threshold": -0.001032934742141372, "compression/movement_sparsity/linear_layer_sparsity": 0.8357565626802934, "compression/movement_sparsity/model_sparsity": 0.8070457536643082, "compression_loss": 91.1564712524414, "distillation_loss": 4.067339897155762, "epoch": 3.42, "learning_rate": 3.653141730064808e-05, "loss": 95.539, "step": 4051, "task_loss": 3.5605287551879883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8550173455375915, "compression/movement_sparsity/importance_threshold": -0.0010312758536863888, "compression/movement_sparsity/linear_layer_sparsity": 0.8358422616730924, "compression/movement_sparsity/model_sparsity": 0.8071285086330635, "compression_loss": 91.18083953857422, "distillation_loss": 3.7759902477264404, "epoch": 3.42, "learning_rate": 3.6526721142105757e-05, "loss": 94.5431, "step": 4052, "task_loss": 2.178741455078125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8552503117294142, "compression/movement_sparsity/importance_threshold": -0.0010296187422943878, "compression/movement_sparsity/linear_layer_sparsity": 0.8359670123148988, "compression/movement_sparsity/model_sparsity": 0.807248973706546, "compression_loss": 91.2052001953125, "distillation_loss": 5.042751312255859, "epoch": 3.43, "learning_rate": 3.652202498356345e-05, "loss": 94.9314, "step": 4053, "task_loss": 2.666360855102539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8554830282254496, "compression/movement_sparsity/importance_threshold": -0.001027963407013035, "compression/movement_sparsity/linear_layer_sparsity": 0.836194036542518, "compression/movement_sparsity/model_sparsity": 0.8074681989535383, "compression_loss": 91.22953033447266, "distillation_loss": 5.691232681274414, "epoch": 3.43, "learning_rate": 3.6517328825021136e-05, "loss": 96.1958, "step": 4054, "task_loss": 2.8927228450775146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8557154951595831, "compression/movement_sparsity/importance_threshold": -0.0010263098468899891, "compression/movement_sparsity/linear_layer_sparsity": 0.8362516064238639, "compression/movement_sparsity/model_sparsity": 0.8075237911323543, "compression_loss": 91.25383758544922, "distillation_loss": 2.9263267517089844, "epoch": 3.43, "learning_rate": 3.651263266647882e-05, "loss": 95.2635, "step": 4055, "task_loss": 1.6256660223007202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8559477126656994, "compression/movement_sparsity/importance_threshold": -0.0010246580609729163, "compression/movement_sparsity/linear_layer_sparsity": 0.8364440147928361, "compression/movement_sparsity/model_sparsity": 0.8077095896819351, "compression_loss": 91.2780532836914, "distillation_loss": 4.448103904724121, "epoch": 3.43, "learning_rate": 3.650793650793651e-05, "loss": 96.2453, "step": 4056, "task_loss": 2.8868820667266846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.856179680877684, "compression/movement_sparsity/importance_threshold": -0.0010230080483094725, "compression/movement_sparsity/linear_layer_sparsity": 0.8366873512817813, "compression/movement_sparsity/model_sparsity": 0.8079445668138943, "compression_loss": 91.30227661132812, "distillation_loss": 3.45430064201355, "epoch": 3.43, "learning_rate": 3.6503240349394195e-05, "loss": 95.6481, "step": 4057, "task_loss": 1.5420575141906738 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8564113999294217, "compression/movement_sparsity/importance_threshold": -0.0010213598079473257, "compression/movement_sparsity/linear_layer_sparsity": 0.8369964614794058, "compression/movement_sparsity/model_sparsity": 0.8082430581252961, "compression_loss": 91.32645416259766, "distillation_loss": 4.311443328857422, "epoch": 3.43, "learning_rate": 3.649854419085189e-05, "loss": 95.0396, "step": 4058, "task_loss": 2.2236149311065674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8566428699547981, "compression/movement_sparsity/importance_threshold": -0.0010197133389341327, "compression/movement_sparsity/linear_layer_sparsity": 0.8372492061366155, "compression/movement_sparsity/model_sparsity": 0.8084871202259972, "compression_loss": 91.35067749023438, "distillation_loss": 4.751638889312744, "epoch": 3.43, "learning_rate": 3.6493848032309575e-05, "loss": 95.2296, "step": 4059, "task_loss": 3.220860004425049 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8568740910876979, "compression/movement_sparsity/importance_threshold": -0.001018068640317558, "compression/movement_sparsity/linear_layer_sparsity": 0.8374093477079652, "compression/movement_sparsity/model_sparsity": 0.8086417604417179, "compression_loss": 91.37483978271484, "distillation_loss": 4.449933052062988, "epoch": 3.43, "learning_rate": 3.648915187376726e-05, "loss": 95.1971, "step": 4060, "task_loss": 2.527759552001953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8571050634620064, "compression/movement_sparsity/importance_threshold": -0.0010164257111452638, "compression/movement_sparsity/linear_layer_sparsity": 0.837487093280951, "compression/movement_sparsity/model_sparsity": 0.8087168352150983, "compression_loss": 91.39896392822266, "distillation_loss": 6.462105751037598, "epoch": 3.43, "learning_rate": 3.648445571522495e-05, "loss": 96.3205, "step": 4061, "task_loss": 2.5712108612060547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.857335787211609, "compression/movement_sparsity/importance_threshold": -0.0010147845504649092, "compression/movement_sparsity/linear_layer_sparsity": 0.8377520363616522, "compression/movement_sparsity/model_sparsity": 0.8089726766859172, "compression_loss": 91.42308044433594, "distillation_loss": 4.397068023681641, "epoch": 3.43, "learning_rate": 3.6479759556682634e-05, "loss": 95.5119, "step": 4062, "task_loss": 2.201524257659912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8575662624703904, "compression/movement_sparsity/importance_threshold": -0.0010131451573241598, "compression/movement_sparsity/linear_layer_sparsity": 0.8378609040121676, "compression/movement_sparsity/model_sparsity": 0.8090778043977214, "compression_loss": 91.44710540771484, "distillation_loss": 3.7737579345703125, "epoch": 3.43, "learning_rate": 3.647506339814033e-05, "loss": 95.8156, "step": 4063, "task_loss": 2.869382619857788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8577964893722361, "compression/movement_sparsity/importance_threshold": -0.001011507530770674, "compression/movement_sparsity/linear_layer_sparsity": 0.8380657254396489, "compression/movement_sparsity/model_sparsity": 0.8092755895790642, "compression_loss": 91.47113800048828, "distillation_loss": 4.1278605461120605, "epoch": 3.44, "learning_rate": 3.6470367239598006e-05, "loss": 95.1407, "step": 4064, "task_loss": 2.126291036605835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.858026468051031, "compression/movement_sparsity/importance_threshold": -0.0010098716698521155, "compression/movement_sparsity/linear_layer_sparsity": 0.8382616514380737, "compression/movement_sparsity/model_sparsity": 0.8094647849167043, "compression_loss": 91.49512481689453, "distillation_loss": 4.2701826095581055, "epoch": 3.44, "learning_rate": 3.64656710810557e-05, "loss": 95.6697, "step": 4065, "task_loss": 2.7335593700408936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8582561986406604, "compression/movement_sparsity/importance_threshold": -0.0010082375736161454, "compression/movement_sparsity/linear_layer_sparsity": 0.8385719898249646, "compression/movement_sparsity/model_sparsity": 0.809764462225293, "compression_loss": 91.51912689208984, "distillation_loss": 3.7335710525512695, "epoch": 3.44, "learning_rate": 3.6460974922513386e-05, "loss": 95.388, "step": 4066, "task_loss": 2.507824420928955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8584856812750092, "compression/movement_sparsity/importance_threshold": -0.0010066052411104275, "compression/movement_sparsity/linear_layer_sparsity": 0.8386870103459803, "compression/movement_sparsity/model_sparsity": 0.8098755314375671, "compression_loss": 91.54306030273438, "distillation_loss": 4.399372100830078, "epoch": 3.44, "learning_rate": 3.645627876397107e-05, "loss": 95.3577, "step": 4067, "task_loss": 3.607466220855713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.858714916087963, "compression/movement_sparsity/importance_threshold": -0.0010049746713826201, "compression/movement_sparsity/linear_layer_sparsity": 0.8388228623878556, "compression/movement_sparsity/model_sparsity": 0.8100067165438743, "compression_loss": 91.5670394897461, "distillation_loss": 4.414911270141602, "epoch": 3.44, "learning_rate": 3.6451582605428765e-05, "loss": 95.6985, "step": 4068, "task_loss": 1.9642137289047241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8589439032134064, "compression/movement_sparsity/importance_threshold": -0.0010033458634803889, "compression/movement_sparsity/linear_layer_sparsity": 0.8389752413260743, "compression/movement_sparsity/model_sparsity": 0.8101538607967927, "compression_loss": 91.5909194946289, "distillation_loss": 6.49188232421875, "epoch": 3.44, "learning_rate": 3.6446886446886445e-05, "loss": 95.748, "step": 4069, "task_loss": 2.8600897789001465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8591726427852251, "compression/movement_sparsity/importance_threshold": -0.0010017188164513913, "compression/movement_sparsity/linear_layer_sparsity": 0.8391254142932804, "compression/movement_sparsity/model_sparsity": 0.8102988748605892, "compression_loss": 91.61486053466797, "distillation_loss": 4.786157608032227, "epoch": 3.44, "learning_rate": 3.644219028834414e-05, "loss": 95.7115, "step": 4070, "task_loss": 2.440843343734741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8594011349373036, "compression/movement_sparsity/importance_threshold": -0.0010000935293432945, "compression/movement_sparsity/linear_layer_sparsity": 0.8394169124953067, "compression/movement_sparsity/model_sparsity": 0.8105803592026225, "compression_loss": 91.638671875, "distillation_loss": 7.0635576248168945, "epoch": 3.44, "learning_rate": 3.6437494129801824e-05, "loss": 96.6166, "step": 4071, "task_loss": 4.260164737701416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8596293798035276, "compression/movement_sparsity/importance_threshold": -0.0009984700012037563, "compression/movement_sparsity/linear_layer_sparsity": 0.8395763505407657, "compression/movement_sparsity/model_sparsity": 0.8107343200607313, "compression_loss": 91.66250610351562, "distillation_loss": 5.319990634918213, "epoch": 3.44, "learning_rate": 3.643279797125951e-05, "loss": 96.0363, "step": 4072, "task_loss": 3.5474965572357178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8598573775177818, "compression/movement_sparsity/importance_threshold": -0.000996848231080441, "compression/movement_sparsity/linear_layer_sparsity": 0.8397719188141616, "compression/movement_sparsity/model_sparsity": 0.8109231699622976, "compression_loss": 91.68633270263672, "distillation_loss": 4.426479339599609, "epoch": 3.44, "learning_rate": 3.64281018127172e-05, "loss": 95.9591, "step": 4073, "task_loss": 2.3052473068237305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8600851282139517, "compression/movement_sparsity/importance_threshold": -0.0009952282180210091, "compression/movement_sparsity/linear_layer_sparsity": 0.8399964389665773, "compression/movement_sparsity/model_sparsity": 0.811139977156773, "compression_loss": 91.7101058959961, "distillation_loss": 3.5452022552490234, "epoch": 3.44, "learning_rate": 3.642340565417488e-05, "loss": 95.7985, "step": 4074, "task_loss": 2.1247427463531494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8603126320259223, "compression/movement_sparsity/importance_threshold": -0.0009936099610731216, "compression/movement_sparsity/linear_layer_sparsity": 0.8400156130281357, "compression/movement_sparsity/model_sparsity": 0.8111584925303307, "compression_loss": 91.73387145996094, "distillation_loss": 3.6384449005126953, "epoch": 3.44, "learning_rate": 3.6418709495632576e-05, "loss": 95.8488, "step": 4075, "task_loss": 2.4205121994018555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8605398890875786, "compression/movement_sparsity/importance_threshold": -0.000991993459284443, "compression/movement_sparsity/linear_layer_sparsity": 0.8401985297596698, "compression/movement_sparsity/model_sparsity": 0.8113351255094189, "compression_loss": 91.75756072998047, "distillation_loss": 3.414308547973633, "epoch": 3.45, "learning_rate": 3.641401333709026e-05, "loss": 95.983, "step": 4076, "task_loss": 2.5628230571746826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8607668995328062, "compression/movement_sparsity/importance_threshold": -0.0009903787117026318, "compression/movement_sparsity/linear_layer_sparsity": 0.8403605553495059, "compression/movement_sparsity/model_sparsity": 0.8114915850217951, "compression_loss": 91.78131866455078, "distillation_loss": 5.4868693351745605, "epoch": 3.45, "learning_rate": 3.640931717854795e-05, "loss": 96.1526, "step": 4077, "task_loss": 3.149911642074585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8609936634954896, "compression/movement_sparsity/importance_threshold": -0.0009887657173753535, "compression/movement_sparsity/linear_layer_sparsity": 0.8404903499142223, "compression/movement_sparsity/model_sparsity": 0.8116169207439187, "compression_loss": 91.80497741699219, "distillation_loss": 3.1320533752441406, "epoch": 3.45, "learning_rate": 3.6404621020005635e-05, "loss": 95.6723, "step": 4078, "task_loss": 1.6541439294815063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8612201811095145, "compression/movement_sparsity/importance_threshold": -0.0009871544753502657, "compression/movement_sparsity/linear_layer_sparsity": 0.8405598558873717, "compression/movement_sparsity/model_sparsity": 0.8116840389730651, "compression_loss": 91.82862091064453, "distillation_loss": 4.653759479522705, "epoch": 3.45, "learning_rate": 3.639992486146332e-05, "loss": 96.0342, "step": 4079, "task_loss": 2.100778818130493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8614464525087656, "compression/movement_sparsity/importance_threshold": -0.0009855449846750347, "compression/movement_sparsity/linear_layer_sparsity": 0.8406076837237592, "compression/movement_sparsity/model_sparsity": 0.811730223776137, "compression_loss": 91.85224914550781, "distillation_loss": 3.5833799839019775, "epoch": 3.45, "learning_rate": 3.6395228702921015e-05, "loss": 96.3369, "step": 4080, "task_loss": 1.9244871139526367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8616724778271284, "compression/movement_sparsity/importance_threshold": -0.000983937244397319, "compression/movement_sparsity/linear_layer_sparsity": 0.840803609722184, "compression/movement_sparsity/model_sparsity": 0.8119194191137771, "compression_loss": 91.87583923339844, "distillation_loss": 4.046632289886475, "epoch": 3.45, "learning_rate": 3.63905325443787e-05, "loss": 96.2581, "step": 4081, "task_loss": 2.692913055419922 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8618982571984878, "compression/movement_sparsity/importance_threshold": -0.0009823312535647832, "compression/movement_sparsity/linear_layer_sparsity": 0.8409930370492474, "compression/movement_sparsity/model_sparsity": 0.8121023390294092, "compression_loss": 91.89942169189453, "distillation_loss": 5.034050941467285, "epoch": 3.45, "learning_rate": 3.638583638583639e-05, "loss": 96.6114, "step": 4082, "task_loss": 4.105672359466553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.862123790756729, "compression/movement_sparsity/importance_threshold": -0.0009807270112250866, "compression/movement_sparsity/linear_layer_sparsity": 0.8411666410058579, "compression/movement_sparsity/model_sparsity": 0.8122699791560418, "compression_loss": 91.92291259765625, "distillation_loss": 3.899226188659668, "epoch": 3.45, "learning_rate": 3.6381140227294074e-05, "loss": 95.4977, "step": 4083, "task_loss": 1.6971179246902466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8623490786357372, "compression/movement_sparsity/importance_threshold": -0.000979124516425893, "compression/movement_sparsity/linear_layer_sparsity": 0.8412975206698291, "compression/movement_sparsity/model_sparsity": 0.8123963627009227, "compression_loss": 91.9464111328125, "distillation_loss": 3.9317498207092285, "epoch": 3.45, "learning_rate": 3.637644406875177e-05, "loss": 96.0209, "step": 4084, "task_loss": 0.9826387763023376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8625741209693975, "compression/movement_sparsity/importance_threshold": -0.0009775237682148625, "compression/movement_sparsity/linear_layer_sparsity": 0.8413611122558312, "compression/movement_sparsity/model_sparsity": 0.812457769720315, "compression_loss": 91.96989440917969, "distillation_loss": 4.879758834838867, "epoch": 3.45, "learning_rate": 3.637174791020945e-05, "loss": 95.6592, "step": 4085, "task_loss": 2.0443265438079834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8627989178915951, "compression/movement_sparsity/importance_threshold": -0.0009759247656396571, "compression/movement_sparsity/linear_layer_sparsity": 0.8414838596374747, "compression/movement_sparsity/model_sparsity": 0.8125763003517841, "compression_loss": 91.99337005615234, "distillation_loss": 5.362430572509766, "epoch": 3.45, "learning_rate": 3.636705175166713e-05, "loss": 96.5789, "step": 4086, "task_loss": 3.054652690887451 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8630234695362149, "compression/movement_sparsity/importance_threshold": -0.0009743275077479423, "compression/movement_sparsity/linear_layer_sparsity": 0.841535658221685, "compression/movement_sparsity/model_sparsity": 0.8126263194952755, "compression_loss": 92.01683807373047, "distillation_loss": 5.310634613037109, "epoch": 3.45, "learning_rate": 3.6362355593124826e-05, "loss": 96.5341, "step": 4087, "task_loss": 2.70072865486145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8632477760371424, "compression/movement_sparsity/importance_threshold": -0.0009727319935873748, "compression/movement_sparsity/linear_layer_sparsity": 0.8417055299138255, "compression/movement_sparsity/model_sparsity": 0.8127903555722046, "compression_loss": 92.04022979736328, "distillation_loss": 5.636066436767578, "epoch": 3.46, "learning_rate": 3.635765943458251e-05, "loss": 96.7357, "step": 4088, "task_loss": 3.340787887573242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8634718375282625, "compression/movement_sparsity/importance_threshold": -0.0009711382222056192, "compression/movement_sparsity/linear_layer_sparsity": 0.8417972506112805, "compression/movement_sparsity/model_sparsity": 0.8128789253815362, "compression_loss": 92.06365966796875, "distillation_loss": 5.293954849243164, "epoch": 3.46, "learning_rate": 3.6352963276040205e-05, "loss": 96.6407, "step": 4089, "task_loss": 2.7891693115234375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8636956541434606, "compression/movement_sparsity/importance_threshold": -0.0009695461926503357, "compression/movement_sparsity/linear_layer_sparsity": 0.8419639981715005, "compression/movement_sparsity/model_sparsity": 0.813039944650087, "compression_loss": 92.08702850341797, "distillation_loss": 3.5348310470581055, "epoch": 3.46, "learning_rate": 3.6348267117497885e-05, "loss": 97.0752, "step": 4090, "task_loss": 2.86129093170166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8639192260166213, "compression/movement_sparsity/importance_threshold": -0.0009679559039691898, "compression/movement_sparsity/linear_layer_sparsity": 0.8421568000380047, "compression/movement_sparsity/model_sparsity": 0.813226123179349, "compression_loss": 92.11032104492188, "distillation_loss": 6.503182888031006, "epoch": 3.46, "learning_rate": 3.634357095895558e-05, "loss": 96.9079, "step": 4091, "task_loss": 3.3186540603637695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8641425532816303, "compression/movement_sparsity/importance_threshold": -0.0009663673552098399, "compression/movement_sparsity/linear_layer_sparsity": 0.8423137340082603, "compression/movement_sparsity/model_sparsity": 0.8133776659849409, "compression_loss": 92.13359832763672, "distillation_loss": 3.5440211296081543, "epoch": 3.46, "learning_rate": 3.6338874800413264e-05, "loss": 96.2059, "step": 4092, "task_loss": 1.6951913833618164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8643656360723726, "compression/movement_sparsity/importance_threshold": -0.0009647805454199472, "compression/movement_sparsity/linear_layer_sparsity": 0.842424163724736, "compression/movement_sparsity/model_sparsity": 0.8134843021009341, "compression_loss": 92.15690612792969, "distillation_loss": 4.817683219909668, "epoch": 3.46, "learning_rate": 3.633417864187095e-05, "loss": 96.2504, "step": 4093, "task_loss": 2.1215877532958984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.864588474522733, "compression/movement_sparsity/importance_threshold": -0.0009631954736471771, "compression/movement_sparsity/linear_layer_sparsity": 0.8425965871887506, "compression/movement_sparsity/model_sparsity": 0.8136508022885232, "compression_loss": 92.18009948730469, "distillation_loss": 3.611205577850342, "epoch": 3.46, "learning_rate": 3.632948248332864e-05, "loss": 96.0161, "step": 4094, "task_loss": 1.3515197038650513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8648110687665972, "compression/movement_sparsity/importance_threshold": -0.000961612138939188, "compression/movement_sparsity/linear_layer_sparsity": 0.8427170451302081, "compression/movement_sparsity/model_sparsity": 0.8137671221291196, "compression_loss": 92.20327758789062, "distillation_loss": 4.677479267120361, "epoch": 3.46, "learning_rate": 3.6324786324786323e-05, "loss": 96.7699, "step": 4095, "task_loss": 3.8243296146392822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8650334189378499, "compression/movement_sparsity/importance_threshold": -0.0009600305403436446, "compression/movement_sparsity/linear_layer_sparsity": 0.8428584180616989, "compression/movement_sparsity/model_sparsity": 0.8139036384654997, "compression_loss": 92.22644805908203, "distillation_loss": 2.8797099590301514, "epoch": 3.46, "learning_rate": 3.6320090166244017e-05, "loss": 95.9059, "step": 4096, "task_loss": 1.1865077018737793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8652555251703765, "compression/movement_sparsity/importance_threshold": -0.0009584506769082062, "compression/movement_sparsity/linear_layer_sparsity": 0.8429710179766844, "compression/movement_sparsity/model_sparsity": 0.8140123702270076, "compression_loss": 92.24951934814453, "distillation_loss": 3.64860200881958, "epoch": 3.46, "learning_rate": 3.63153940077017e-05, "loss": 95.6641, "step": 4097, "task_loss": 2.041475296020508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8654773875980619, "compression/movement_sparsity/importance_threshold": -0.0009568725476805373, "compression/movement_sparsity/linear_layer_sparsity": 0.8430660297444069, "compression/movement_sparsity/model_sparsity": 0.8141041180482185, "compression_loss": 92.27265930175781, "distillation_loss": 3.8155243396759033, "epoch": 3.46, "learning_rate": 3.631069784915939e-05, "loss": 96.375, "step": 4098, "task_loss": 2.6670796871185303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8656990063547915, "compression/movement_sparsity/importance_threshold": -0.0009552961517082974, "compression/movement_sparsity/linear_layer_sparsity": 0.8432353052281656, "compression/movement_sparsity/model_sparsity": 0.8142675783983577, "compression_loss": 92.29566192626953, "distillation_loss": 6.5212578773498535, "epoch": 3.46, "learning_rate": 3.6306001690617076e-05, "loss": 97.2348, "step": 4099, "task_loss": 2.9742178916931152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8659203815744502, "compression/movement_sparsity/importance_threshold": -0.0009537214880391493, "compression/movement_sparsity/linear_layer_sparsity": 0.8434323163258454, "compression/movement_sparsity/model_sparsity": 0.8144578215587552, "compression_loss": 92.3187484741211, "distillation_loss": 4.306432723999023, "epoch": 3.47, "learning_rate": 3.630130553207476e-05, "loss": 96.6916, "step": 4100, "task_loss": 1.80338454246521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8661415133909232, "compression/movement_sparsity/importance_threshold": -0.0009521485557207558, "compression/movement_sparsity/linear_layer_sparsity": 0.8436621665811942, "compression/movement_sparsity/model_sparsity": 0.8146797757507307, "compression_loss": 92.34172058105469, "distillation_loss": 4.417397499084473, "epoch": 3.47, "learning_rate": 3.6296609373532455e-05, "loss": 96.554, "step": 4101, "task_loss": 2.1655983924865723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8663624019380959, "compression/movement_sparsity/importance_threshold": -0.0009505773538007762, "compression/movement_sparsity/linear_layer_sparsity": 0.8439131345374262, "compression/movement_sparsity/model_sparsity": 0.8149221221855985, "compression_loss": 92.36473846435547, "distillation_loss": 3.801133632659912, "epoch": 3.47, "learning_rate": 3.629191321499014e-05, "loss": 96.6179, "step": 4102, "task_loss": 1.6915870904922485 }, { "compression/movement_sparsity/importance_regularization_factor": 0.866583047349853, "compression/movement_sparsity/importance_threshold": -0.0009490078813268752, "compression/movement_sparsity/linear_layer_sparsity": 0.8440609942161109, "compression/movement_sparsity/model_sparsity": 0.8150649024294507, "compression_loss": 92.38774108886719, "distillation_loss": 3.3195066452026367, "epoch": 3.47, "learning_rate": 3.628721705644783e-05, "loss": 96.7056, "step": 4103, "task_loss": 2.0150632858276367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8668034497600802, "compression/movement_sparsity/importance_threshold": -0.000947440137346712, "compression/movement_sparsity/linear_layer_sparsity": 0.8442133016093238, "compression/movement_sparsity/model_sparsity": 0.8152119775951544, "compression_loss": 92.41072082519531, "distillation_loss": 4.266412734985352, "epoch": 3.47, "learning_rate": 3.6282520897905514e-05, "loss": 96.5063, "step": 4104, "task_loss": 2.7569997310638428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.867023609302662, "compression/movement_sparsity/importance_threshold": -0.0009458741209079514, "compression/movement_sparsity/linear_layer_sparsity": 0.8444720441228544, "compression/movement_sparsity/model_sparsity": 0.8154618315073602, "compression_loss": 92.43367767333984, "distillation_loss": 3.6170902252197266, "epoch": 3.47, "learning_rate": 3.62778247393632e-05, "loss": 96.4994, "step": 4105, "task_loss": 2.2933666706085205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.867243526111484, "compression/movement_sparsity/importance_threshold": -0.0009443098310582525, "compression/movement_sparsity/linear_layer_sparsity": 0.8445960554662674, "compression/movement_sparsity/model_sparsity": 0.8155815826796234, "compression_loss": 92.45669555664062, "distillation_loss": 6.6453962326049805, "epoch": 3.47, "learning_rate": 3.6273128580820893e-05, "loss": 96.7941, "step": 4106, "task_loss": 2.9741716384887695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8674632003204312, "compression/movement_sparsity/importance_threshold": -0.0009427472668452792, "compression/movement_sparsity/linear_layer_sparsity": 0.8446745403376467, "compression/movement_sparsity/model_sparsity": 0.815657371354223, "compression_loss": 92.4795913696289, "distillation_loss": 4.811911106109619, "epoch": 3.47, "learning_rate": 3.626843242227857e-05, "loss": 97.106, "step": 4107, "task_loss": 3.128206968307495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8676826320633888, "compression/movement_sparsity/importance_threshold": -0.0009411864273166917, "compression/movement_sparsity/linear_layer_sparsity": 0.8449033889629142, "compression/movement_sparsity/model_sparsity": 0.8158783583251918, "compression_loss": 92.50251770019531, "distillation_loss": 5.0384416580200195, "epoch": 3.47, "learning_rate": 3.6263736263736266e-05, "loss": 97.2559, "step": 4108, "task_loss": 2.598440647125244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8679018214742417, "compression/movement_sparsity/importance_threshold": -0.0009396273115201536, "compression/movement_sparsity/linear_layer_sparsity": 0.845100101956403, "compression/movement_sparsity/model_sparsity": 0.8160683136221945, "compression_loss": 92.52540588378906, "distillation_loss": 4.948432445526123, "epoch": 3.47, "learning_rate": 3.625904010519395e-05, "loss": 96.6198, "step": 4109, "task_loss": 2.3219923973083496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8681207686868754, "compression/movement_sparsity/importance_threshold": -0.0009380699185033252, "compression/movement_sparsity/linear_layer_sparsity": 0.8451605455621491, "compression/movement_sparsity/model_sparsity": 0.816126680804137, "compression_loss": 92.54821014404297, "distillation_loss": 5.552611351013184, "epoch": 3.47, "learning_rate": 3.625434394665164e-05, "loss": 97.1367, "step": 4110, "task_loss": 3.1198949813842773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8683394738351747, "compression/movement_sparsity/importance_threshold": -0.0009365142473138684, "compression/movement_sparsity/linear_layer_sparsity": 0.8453831578477431, "compression/movement_sparsity/model_sparsity": 0.8163416456728853, "compression_loss": 92.57112121582031, "distillation_loss": 3.009075164794922, "epoch": 3.47, "learning_rate": 3.624964778810933e-05, "loss": 96.8289, "step": 4111, "task_loss": 1.8891996145248413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8685579370530251, "compression/movement_sparsity/importance_threshold": -0.0009349602969994462, "compression/movement_sparsity/linear_layer_sparsity": 0.8454245943302777, "compression/movement_sparsity/model_sparsity": 0.8163816586847713, "compression_loss": 92.59390258789062, "distillation_loss": 4.074714660644531, "epoch": 3.48, "learning_rate": 3.624495162956701e-05, "loss": 96.6889, "step": 4112, "task_loss": 2.1090447902679443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8687761584743113, "compression/movement_sparsity/importance_threshold": -0.0009334080666077204, "compression/movement_sparsity/linear_layer_sparsity": 0.8455805147462843, "compression/movement_sparsity/model_sparsity": 0.8165322227548206, "compression_loss": 92.61672973632812, "distillation_loss": 4.519030570983887, "epoch": 3.48, "learning_rate": 3.6240255471024705e-05, "loss": 96.8673, "step": 4113, "task_loss": 3.0081140995025635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8689941382329187, "compression/movement_sparsity/importance_threshold": -0.000931857555186353, "compression/movement_sparsity/linear_layer_sparsity": 0.8457818900893187, "compression/movement_sparsity/model_sparsity": 0.8167266802353189, "compression_loss": 92.63955688476562, "distillation_loss": 5.088354110717773, "epoch": 3.48, "learning_rate": 3.623555931248239e-05, "loss": 97.0754, "step": 4114, "task_loss": 3.010833501815796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8692118764627327, "compression/movement_sparsity/importance_threshold": -0.0009303087617830025, "compression/movement_sparsity/linear_layer_sparsity": 0.8458649061443996, "compression/movement_sparsity/model_sparsity": 0.8168068444335205, "compression_loss": 92.66228485107422, "distillation_loss": 4.94102668762207, "epoch": 3.48, "learning_rate": 3.6230863153940084e-05, "loss": 97.5181, "step": 4115, "task_loss": 2.0767087936401367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8694293732976379, "compression/movement_sparsity/importance_threshold": -0.0009287616854453352, "compression/movement_sparsity/linear_layer_sparsity": 0.8460832018813094, "compression/movement_sparsity/model_sparsity": 0.8170176410403112, "compression_loss": 92.68502807617188, "distillation_loss": 4.644157409667969, "epoch": 3.48, "learning_rate": 3.6226166995397764e-05, "loss": 97.0226, "step": 4116, "task_loss": 2.9145474433898926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8696466288715199, "compression/movement_sparsity/importance_threshold": -0.0009272163252210105, "compression/movement_sparsity/linear_layer_sparsity": 0.8460278141226407, "compression/movement_sparsity/model_sparsity": 0.8169641560215455, "compression_loss": 92.70767974853516, "distillation_loss": 4.707746505737305, "epoch": 3.48, "learning_rate": 3.622147083685546e-05, "loss": 97.274, "step": 4117, "task_loss": 2.6685452461242676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8698636433182634, "compression/movement_sparsity/importance_threshold": -0.0009256726801576921, "compression/movement_sparsity/linear_layer_sparsity": 0.8461536737120373, "compression/movement_sparsity/model_sparsity": 0.8170856919468569, "compression_loss": 92.73036193847656, "distillation_loss": 5.938422203063965, "epoch": 3.48, "learning_rate": 3.621677467831314e-05, "loss": 97.2673, "step": 4118, "task_loss": 2.8293392658233643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8700804167717541, "compression/movement_sparsity/importance_threshold": -0.0009241307493030385, "compression/movement_sparsity/linear_layer_sparsity": 0.8462301195507508, "compression/movement_sparsity/model_sparsity": 0.8171595116358357, "compression_loss": 92.75297546386719, "distillation_loss": 5.390432357788086, "epoch": 3.48, "learning_rate": 3.621207851977083e-05, "loss": 97.5003, "step": 4119, "task_loss": 2.8107123374938965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8702969493658765, "compression/movement_sparsity/importance_threshold": -0.0009225905317047159, "compression/movement_sparsity/linear_layer_sparsity": 0.8463487053978894, "compression/movement_sparsity/model_sparsity": 0.8172740236943123, "compression_loss": 92.7755355834961, "distillation_loss": 3.7440619468688965, "epoch": 3.48, "learning_rate": 3.6207382361228516e-05, "loss": 96.2938, "step": 4120, "task_loss": 1.404693841934204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8705132412345163, "compression/movement_sparsity/importance_threshold": -0.000921052026410382, "compression/movement_sparsity/linear_layer_sparsity": 0.846450168140303, "compression/movement_sparsity/model_sparsity": 0.8173720008793881, "compression_loss": 92.79810333251953, "distillation_loss": 3.4303483963012695, "epoch": 3.48, "learning_rate": 3.62026862026862e-05, "loss": 96.3603, "step": 4121, "task_loss": 1.1495141983032227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8707292925115583, "compression/movement_sparsity/importance_threshold": -0.0009195152324677023, "compression/movement_sparsity/linear_layer_sparsity": 0.8466446393902763, "compression/movement_sparsity/model_sparsity": 0.8175597914436613, "compression_loss": 92.82062530517578, "distillation_loss": 3.5982935428619385, "epoch": 3.48, "learning_rate": 3.6197990044143895e-05, "loss": 97.1189, "step": 4122, "task_loss": 1.9289450645446777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8709451033308877, "compression/movement_sparsity/importance_threshold": -0.0009179801489243369, "compression/movement_sparsity/linear_layer_sparsity": 0.8468018476163875, "compression/movement_sparsity/model_sparsity": 0.8177115990835765, "compression_loss": 92.84310913085938, "distillation_loss": 5.343839168548584, "epoch": 3.48, "learning_rate": 3.619329388560158e-05, "loss": 97.3389, "step": 4123, "task_loss": 3.672727346420288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8711606738263898, "compression/movement_sparsity/importance_threshold": -0.0009164467748279462, "compression/movement_sparsity/linear_layer_sparsity": 0.8470111286825646, "compression/movement_sparsity/model_sparsity": 0.8179136907013065, "compression_loss": 92.86553192138672, "distillation_loss": 3.70902943611145, "epoch": 3.49, "learning_rate": 3.618859772705927e-05, "loss": 96.5523, "step": 4124, "task_loss": 3.395460605621338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8713760041319496, "compression/movement_sparsity/importance_threshold": -0.0009149151092261946, "compression/movement_sparsity/linear_layer_sparsity": 0.8472204455212444, "compression/movement_sparsity/model_sparsity": 0.8181158168626439, "compression_loss": 92.887939453125, "distillation_loss": 5.255092620849609, "epoch": 3.49, "learning_rate": 3.6183901568516954e-05, "loss": 97.7634, "step": 4125, "task_loss": 2.9617762565612793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8715910943814523, "compression/movement_sparsity/importance_threshold": -0.0009133851511667424, "compression/movement_sparsity/linear_layer_sparsity": 0.8473278822716436, "compression/movement_sparsity/model_sparsity": 0.8182195628301527, "compression_loss": 92.91034698486328, "distillation_loss": 5.273437976837158, "epoch": 3.49, "learning_rate": 3.617920540997464e-05, "loss": 97.3741, "step": 4126, "task_loss": 2.757915735244751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8718059447087829, "compression/movement_sparsity/importance_threshold": -0.0009118568996972533, "compression/movement_sparsity/linear_layer_sparsity": 0.8475211134081828, "compression/movement_sparsity/model_sparsity": 0.8184061558827033, "compression_loss": 92.93273162841797, "distillation_loss": 3.446136236190796, "epoch": 3.49, "learning_rate": 3.6174509251432334e-05, "loss": 96.9746, "step": 4127, "task_loss": 1.2802687883377075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8720205552478268, "compression/movement_sparsity/importance_threshold": -0.0009103303538653858, "compression/movement_sparsity/linear_layer_sparsity": 0.8475917641014252, "compression/movement_sparsity/model_sparsity": 0.8184743795072859, "compression_loss": 92.95513153076172, "distillation_loss": 3.637158155441284, "epoch": 3.49, "learning_rate": 3.616981309289002e-05, "loss": 97.055, "step": 4128, "task_loss": 2.3374853134155273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8722349261324689, "compression/movement_sparsity/importance_threshold": -0.0009088055127188053, "compression/movement_sparsity/linear_layer_sparsity": 0.8477353191555934, "compression/movement_sparsity/model_sparsity": 0.8186130030037163, "compression_loss": 92.97755432128906, "distillation_loss": 4.255396842956543, "epoch": 3.49, "learning_rate": 3.6165116934347706e-05, "loss": 97.5416, "step": 4129, "task_loss": 2.175326108932495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8724490574965945, "compression/movement_sparsity/importance_threshold": -0.0009072823753051712, "compression/movement_sparsity/linear_layer_sparsity": 0.8479858459176229, "compression/movement_sparsity/model_sparsity": 0.8188549234007596, "compression_loss": 92.99986267089844, "distillation_loss": 4.307700157165527, "epoch": 3.49, "learning_rate": 3.616042077580539e-05, "loss": 96.8646, "step": 4130, "task_loss": 2.5518991947174072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8726629494740886, "compression/movement_sparsity/importance_threshold": -0.0009057609406721472, "compression/movement_sparsity/linear_layer_sparsity": 0.8480760403216205, "compression/movement_sparsity/model_sparsity": 0.8189420193495096, "compression_loss": 93.0221176147461, "distillation_loss": 4.198293209075928, "epoch": 3.49, "learning_rate": 3.615572461726308e-05, "loss": 96.8107, "step": 4131, "task_loss": 3.5321638584136963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8728766021988363, "compression/movement_sparsity/importance_threshold": -0.0009042412078673944, "compression/movement_sparsity/linear_layer_sparsity": 0.8481708374543256, "compression/movement_sparsity/model_sparsity": 0.8190335599090761, "compression_loss": 93.04441833496094, "distillation_loss": 6.070283889770508, "epoch": 3.49, "learning_rate": 3.615102845872077e-05, "loss": 96.9655, "step": 4132, "task_loss": 3.322124481201172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8730900158047231, "compression/movement_sparsity/importance_threshold": -0.0009027231759385738, "compression/movement_sparsity/linear_layer_sparsity": 0.8483080130788087, "compression/movement_sparsity/model_sparsity": 0.8191660231288566, "compression_loss": 93.06671905517578, "distillation_loss": 4.9643425941467285, "epoch": 3.49, "learning_rate": 3.614633230017845e-05, "loss": 97.1395, "step": 4133, "task_loss": 2.6423285007476807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8733031904256338, "compression/movement_sparsity/importance_threshold": -0.0009012068439333484, "compression/movement_sparsity/linear_layer_sparsity": 0.8484499822186812, "compression/movement_sparsity/model_sparsity": 0.8193031151920264, "compression_loss": 93.08892822265625, "distillation_loss": 4.854728698730469, "epoch": 3.49, "learning_rate": 3.6141636141636145e-05, "loss": 97.8655, "step": 4134, "task_loss": 3.06130313873291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8735161261954536, "compression/movement_sparsity/importance_threshold": -0.0008996922108993784, "compression/movement_sparsity/linear_layer_sparsity": 0.8485581463433062, "compression/movement_sparsity/model_sparsity": 0.8194075635462187, "compression_loss": 93.11116790771484, "distillation_loss": 3.4893229007720947, "epoch": 3.5, "learning_rate": 3.613693998309383e-05, "loss": 96.8016, "step": 4135, "task_loss": 2.7159030437469482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8737288232480677, "compression/movement_sparsity/importance_threshold": -0.0008981792758843282, "compression/movement_sparsity/linear_layer_sparsity": 0.8487003539665314, "compression/movement_sparsity/model_sparsity": 0.8195448859001043, "compression_loss": 93.1332778930664, "distillation_loss": 3.818812131881714, "epoch": 3.5, "learning_rate": 3.613224382455152e-05, "loss": 97.3946, "step": 4136, "task_loss": 2.5395710468292236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8739412817173613, "compression/movement_sparsity/importance_threshold": -0.0008966680379358565, "compression/movement_sparsity/linear_layer_sparsity": 0.8489390161817633, "compression/movement_sparsity/model_sparsity": 0.8197753493340321, "compression_loss": 93.15544128417969, "distillation_loss": 5.152992248535156, "epoch": 3.5, "learning_rate": 3.6127547666009204e-05, "loss": 96.9947, "step": 4137, "task_loss": 2.8254737854003906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8741535017372193, "compression/movement_sparsity/importance_threshold": -0.0008951584961016295, "compression/movement_sparsity/linear_layer_sparsity": 0.8491074569737874, "compression/movement_sparsity/model_sparsity": 0.8199380036666658, "compression_loss": 93.17748260498047, "distillation_loss": 3.939605951309204, "epoch": 3.5, "learning_rate": 3.612285150746689e-05, "loss": 97.3522, "step": 4138, "task_loss": 2.7267792224884033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8743654834415271, "compression/movement_sparsity/importance_threshold": -0.0008936506494293048, "compression/movement_sparsity/linear_layer_sparsity": 0.849268254374357, "compression/movement_sparsity/model_sparsity": 0.8200932771818551, "compression_loss": 93.19955444335938, "distillation_loss": 4.806919097900391, "epoch": 3.5, "learning_rate": 3.611815534892458e-05, "loss": 97.7808, "step": 4139, "task_loss": 3.256376028060913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8745772269641696, "compression/movement_sparsity/importance_threshold": -0.000892144496966547, "compression/movement_sparsity/linear_layer_sparsity": 0.8494212175967899, "compression/movement_sparsity/model_sparsity": 0.8202409856470275, "compression_loss": 93.22150421142578, "distillation_loss": 5.588584899902344, "epoch": 3.5, "learning_rate": 3.611345919038227e-05, "loss": 98.3658, "step": 4140, "task_loss": 2.482548475265503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8747887324390324, "compression/movement_sparsity/importance_threshold": -0.0008906400377610146, "compression/movement_sparsity/linear_layer_sparsity": 0.8496233322382177, "compression/movement_sparsity/model_sparsity": 0.820436157028745, "compression_loss": 93.24352264404297, "distillation_loss": 2.821225643157959, "epoch": 3.5, "learning_rate": 3.6108763031839956e-05, "loss": 96.7005, "step": 4141, "task_loss": 2.9290082454681396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.875, "compression/movement_sparsity/importance_threshold": -0.000889137270860374, "compression/movement_sparsity/linear_layer_sparsity": 0.8498070717373188, "compression/movement_sparsity/model_sparsity": 0.820613584510803, "compression_loss": 93.2655029296875, "distillation_loss": 4.243393421173096, "epoch": 3.5, "learning_rate": 3.610406687329764e-05, "loss": 97.3016, "step": 4142, "task_loss": 2.5672523975372314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.875211029780958, "compression/movement_sparsity/importance_threshold": -0.0008876361953122835, "compression/movement_sparsity/linear_layer_sparsity": 0.8499428999308589, "compression/movement_sparsity/model_sparsity": 0.8207447465880386, "compression_loss": 93.2874755859375, "distillation_loss": 4.407837867736816, "epoch": 3.5, "learning_rate": 3.609937071475533e-05, "loss": 97.4781, "step": 4143, "task_loss": 1.7705891132354736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8754218219157913, "compression/movement_sparsity/importance_threshold": -0.0008861368101644078, "compression/movement_sparsity/linear_layer_sparsity": 0.8500098541321343, "compression/movement_sparsity/model_sparsity": 0.820809400706525, "compression_loss": 93.30936431884766, "distillation_loss": 6.193343639373779, "epoch": 3.5, "learning_rate": 3.609467455621302e-05, "loss": 98.2486, "step": 4144, "task_loss": 2.645648717880249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8756323765383853, "compression/movement_sparsity/importance_threshold": -0.0008846391144644054, "compression/movement_sparsity/linear_layer_sparsity": 0.850210335162596, "compression/movement_sparsity/model_sparsity": 0.8210029945968387, "compression_loss": 93.33120727539062, "distillation_loss": 4.342700481414795, "epoch": 3.5, "learning_rate": 3.608997839767071e-05, "loss": 96.9508, "step": 4145, "task_loss": 1.630873680114746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8758426937826249, "compression/movement_sparsity/importance_threshold": -0.0008831431072599408, "compression/movement_sparsity/linear_layer_sparsity": 0.850455376807513, "compression/movement_sparsity/model_sparsity": 0.8212396183074164, "compression_loss": 93.35308837890625, "distillation_loss": 4.706840515136719, "epoch": 3.5, "learning_rate": 3.6085282239128394e-05, "loss": 97.8865, "step": 4146, "task_loss": 3.2090816497802734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8760527737823953, "compression/movement_sparsity/importance_threshold": -0.0008816487875986751, "compression/movement_sparsity/linear_layer_sparsity": 0.8506670427072173, "compression/movement_sparsity/model_sparsity": 0.8214440128323053, "compression_loss": 93.37484741210938, "distillation_loss": 4.1456403732299805, "epoch": 3.51, "learning_rate": 3.608058608058608e-05, "loss": 97.5254, "step": 4147, "task_loss": 2.459594488143921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8762626166715816, "compression/movement_sparsity/importance_threshold": -0.0008801561545282712, "compression/movement_sparsity/linear_layer_sparsity": 0.8507690778096774, "compression/movement_sparsity/model_sparsity": 0.8215425427150992, "compression_loss": 93.39672088623047, "distillation_loss": 4.545586109161377, "epoch": 3.51, "learning_rate": 3.6075889922043774e-05, "loss": 97.3667, "step": 4148, "task_loss": 2.83333158493042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8764722225840692, "compression/movement_sparsity/importance_threshold": -0.0008786652070963876, "compression/movement_sparsity/linear_layer_sparsity": 0.8509277646325754, "compression/movement_sparsity/model_sparsity": 0.821695778157453, "compression_loss": 93.41851043701172, "distillation_loss": 4.039731502532959, "epoch": 3.51, "learning_rate": 3.607119376350146e-05, "loss": 97.8409, "step": 4149, "task_loss": 1.8350461721420288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8766815916537428, "compression/movement_sparsity/importance_threshold": -0.0008771759443506905, "compression/movement_sparsity/linear_layer_sparsity": 0.8509753539856101, "compression/movement_sparsity/model_sparsity": 0.821741732669809, "compression_loss": 93.44020080566406, "distillation_loss": 4.166861534118652, "epoch": 3.51, "learning_rate": 3.606649760495914e-05, "loss": 97.7055, "step": 4150, "task_loss": 2.660228967666626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.876890724014488, "compression/movement_sparsity/importance_threshold": -0.0008756883653388376, "compression/movement_sparsity/linear_layer_sparsity": 0.8511564820919989, "compression/movement_sparsity/model_sparsity": 0.821916638468528, "compression_loss": 93.46195220947266, "distillation_loss": 3.6192338466644287, "epoch": 3.51, "learning_rate": 3.606180144641683e-05, "loss": 97.1304, "step": 4151, "task_loss": 1.7001402378082275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8770996198001896, "compression/movement_sparsity/importance_threshold": -0.0008742024691084944, "compression/movement_sparsity/linear_layer_sparsity": 0.8512301734479885, "compression/movement_sparsity/model_sparsity": 0.8219877982997383, "compression_loss": 93.4836654663086, "distillation_loss": 4.483870506286621, "epoch": 3.51, "learning_rate": 3.605710528787452e-05, "loss": 97.9334, "step": 4152, "task_loss": 2.2263777256011963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.877308279144733, "compression/movement_sparsity/importance_threshold": -0.0008727182547073192, "compression/movement_sparsity/linear_layer_sparsity": 0.8514518318001716, "compression/movement_sparsity/model_sparsity": 0.8222018420056231, "compression_loss": 93.50535583496094, "distillation_loss": 5.511635780334473, "epoch": 3.51, "learning_rate": 3.605240912933221e-05, "loss": 98.1931, "step": 4153, "task_loss": 3.0382919311523438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8775167021820031, "compression/movement_sparsity/importance_threshold": -0.0008712357211829776, "compression/movement_sparsity/linear_layer_sparsity": 0.8515539384476375, "compression/movement_sparsity/model_sparsity": 0.8223004409756317, "compression_loss": 93.52705383300781, "distillation_loss": 3.65960431098938, "epoch": 3.51, "learning_rate": 3.604771297078989e-05, "loss": 97.7576, "step": 4154, "task_loss": 2.07135272026062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8777248890458851, "compression/movement_sparsity/importance_threshold": -0.000869754867583129, "compression/movement_sparsity/linear_layer_sparsity": 0.8517327771138401, "compression/movement_sparsity/model_sparsity": 0.8224731359834782, "compression_loss": 93.54869842529297, "distillation_loss": 3.8200478553771973, "epoch": 3.51, "learning_rate": 3.6043016812247585e-05, "loss": 97.999, "step": 4155, "task_loss": 2.116802930831909 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8779328398702643, "compression/movement_sparsity/importance_threshold": -0.000868275692955436, "compression/movement_sparsity/linear_layer_sparsity": 0.8518563591872182, "compression/movement_sparsity/model_sparsity": 0.8225924726324528, "compression_loss": 93.57035064697266, "distillation_loss": 4.384474754333496, "epoch": 3.51, "learning_rate": 3.603832065370527e-05, "loss": 97.9388, "step": 4156, "task_loss": 1.933719277381897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8781405547890256, "compression/movement_sparsity/importance_threshold": -0.00086679819634756, "compression/movement_sparsity/linear_layer_sparsity": 0.851948282595523, "compression/movement_sparsity/model_sparsity": 0.8226812381888929, "compression_loss": 93.5919418334961, "distillation_loss": 5.045559406280518, "epoch": 3.51, "learning_rate": 3.603362449516296e-05, "loss": 98.3578, "step": 4157, "task_loss": 2.3077800273895264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8783480339360544, "compression/movement_sparsity/importance_threshold": -0.0008653223768071636, "compression/movement_sparsity/linear_layer_sparsity": 0.8520206503689052, "compression/movement_sparsity/model_sparsity": 0.82275111990663, "compression_loss": 93.61351013183594, "distillation_loss": 4.841887474060059, "epoch": 3.51, "learning_rate": 3.602892833662065e-05, "loss": 99.1189, "step": 4158, "task_loss": 3.4726030826568604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8785552774452356, "compression/movement_sparsity/importance_threshold": -0.0008638482333819079, "compression/movement_sparsity/linear_layer_sparsity": 0.8521799810968556, "compression/movement_sparsity/model_sparsity": 0.8229049771339166, "compression_loss": 93.63512420654297, "distillation_loss": 4.383157730102539, "epoch": 3.52, "learning_rate": 3.602423217807833e-05, "loss": 97.7297, "step": 4159, "task_loss": 2.2356972694396973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8787622854504545, "compression/movement_sparsity/importance_threshold": -0.0008623757651194551, "compression/movement_sparsity/linear_layer_sparsity": 0.8524888170386243, "compression/movement_sparsity/model_sparsity": 0.8232032036109953, "compression_loss": 93.65663146972656, "distillation_loss": 5.066877365112305, "epoch": 3.52, "learning_rate": 3.6019536019536024e-05, "loss": 98.196, "step": 4160, "task_loss": 1.663059949874878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8789690580855962, "compression/movement_sparsity/importance_threshold": -0.0008609049710674678, "compression/movement_sparsity/linear_layer_sparsity": 0.8526389661574951, "compression/movement_sparsity/model_sparsity": 0.8233481946457202, "compression_loss": 93.67815399169922, "distillation_loss": 6.071979522705078, "epoch": 3.52, "learning_rate": 3.601483986099371e-05, "loss": 97.9847, "step": 4161, "task_loss": 3.6604411602020264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8791755954845459, "compression/movement_sparsity/importance_threshold": -0.0008594358502736054, "compression/movement_sparsity/linear_layer_sparsity": 0.852845600058457, "compression/movement_sparsity/model_sparsity": 0.8235477300365037, "compression_loss": 93.69969940185547, "distillation_loss": 5.219454765319824, "epoch": 3.52, "learning_rate": 3.6010143702451396e-05, "loss": 98.5404, "step": 4162, "task_loss": 3.4790167808532715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8793818977811886, "compression/movement_sparsity/importance_threshold": -0.0008579684017855325, "compression/movement_sparsity/linear_layer_sparsity": 0.8529931497087833, "compression/movement_sparsity/model_sparsity": 0.8236902109024253, "compression_loss": 93.72112274169922, "distillation_loss": 4.4483842849731445, "epoch": 3.52, "learning_rate": 3.600544754390908e-05, "loss": 98.6993, "step": 4163, "task_loss": 1.5288991928100586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8795879651094095, "compression/movement_sparsity/importance_threshold": -0.0008565026246509103, "compression/movement_sparsity/linear_layer_sparsity": 0.8530496583392095, "compression/movement_sparsity/model_sparsity": 0.8237447782875557, "compression_loss": 93.74264526367188, "distillation_loss": 4.86331844329834, "epoch": 3.52, "learning_rate": 3.600075138536677e-05, "loss": 98.2776, "step": 4164, "task_loss": 2.646252393722534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8797937976030938, "compression/movement_sparsity/importance_threshold": -0.0008550385179173989, "compression/movement_sparsity/linear_layer_sparsity": 0.8532474564319532, "compression/movement_sparsity/model_sparsity": 0.8239357814073155, "compression_loss": 93.76416015625, "distillation_loss": 5.015115737915039, "epoch": 3.52, "learning_rate": 3.599605522682446e-05, "loss": 98.0622, "step": 4165, "task_loss": 3.022916555404663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8799993953961265, "compression/movement_sparsity/importance_threshold": -0.0008535760806326637, "compression/movement_sparsity/linear_layer_sparsity": 0.8533634308863797, "compression/movement_sparsity/model_sparsity": 0.8240477717824533, "compression_loss": 93.78551483154297, "distillation_loss": 2.9383997917175293, "epoch": 3.52, "learning_rate": 3.599135906828215e-05, "loss": 98.303, "step": 4166, "task_loss": 1.3454824686050415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.880204758622393, "compression/movement_sparsity/importance_threshold": -0.0008521153118443616, "compression/movement_sparsity/linear_layer_sparsity": 0.8535310369866693, "compression/movement_sparsity/model_sparsity": 0.8242096200975814, "compression_loss": 93.80697631835938, "distillation_loss": 4.379079818725586, "epoch": 3.52, "learning_rate": 3.5986662909739835e-05, "loss": 98.2664, "step": 4167, "task_loss": 3.5277321338653564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8804098874157782, "compression/movement_sparsity/importance_threshold": -0.0008506562106001588, "compression/movement_sparsity/linear_layer_sparsity": 0.8536341452642167, "compression/movement_sparsity/model_sparsity": 0.8243091862885967, "compression_loss": 93.82833862304688, "distillation_loss": 5.551721572875977, "epoch": 3.52, "learning_rate": 3.598196675119752e-05, "loss": 98.3138, "step": 4168, "task_loss": 2.9399077892303467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8806147819101673, "compression/movement_sparsity/importance_threshold": -0.0008491987759477156, "compression/movement_sparsity/linear_layer_sparsity": 0.8537976732991749, "compression/movement_sparsity/model_sparsity": 0.8244670966324831, "compression_loss": 93.84967803955078, "distillation_loss": 5.319038391113281, "epoch": 3.52, "learning_rate": 3.597727059265521e-05, "loss": 99.4557, "step": 4169, "task_loss": 2.4408295154571533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8808194422394454, "compression/movement_sparsity/importance_threshold": -0.000847743006934693, "compression/movement_sparsity/linear_layer_sparsity": 0.8539610105474509, "compression/movement_sparsity/model_sparsity": 0.8246248227437967, "compression_loss": 93.87105560302734, "distillation_loss": 4.726320266723633, "epoch": 3.52, "learning_rate": 3.59725744341129e-05, "loss": 97.897, "step": 4170, "task_loss": 2.761012554168701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8810238685374978, "compression/movement_sparsity/importance_threshold": -0.000846288902608754, "compression/movement_sparsity/linear_layer_sparsity": 0.8541652953873885, "compression/movement_sparsity/model_sparsity": 0.8248220897710288, "compression_loss": 93.89234924316406, "distillation_loss": 4.850901126861572, "epoch": 3.53, "learning_rate": 3.596787827557058e-05, "loss": 98.0167, "step": 4171, "task_loss": 3.065613031387329 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8812280609382095, "compression/movement_sparsity/importance_threshold": -0.0008448364620175603, "compression/movement_sparsity/linear_layer_sparsity": 0.8543345947194825, "compression/movement_sparsity/model_sparsity": 0.8249855731502396, "compression_loss": 93.91360473632812, "distillation_loss": 5.020024299621582, "epoch": 3.53, "learning_rate": 3.596318211702827e-05, "loss": 98.2392, "step": 4172, "task_loss": 2.783991575241089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8814320195754657, "compression/movement_sparsity/importance_threshold": -0.0008433856842087733, "compression/movement_sparsity/linear_layer_sparsity": 0.854380490840713, "compression/movement_sparsity/model_sparsity": 0.8250298925985128, "compression_loss": 93.93487548828125, "distillation_loss": 3.9645235538482666, "epoch": 3.53, "learning_rate": 3.595848595848596e-05, "loss": 98.3656, "step": 4173, "task_loss": 2.6184308528900146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8816357445831514, "compression/movement_sparsity/importance_threshold": -0.0008419365682300556, "compression/movement_sparsity/linear_layer_sparsity": 0.8544766711768639, "compression/movement_sparsity/model_sparsity": 0.8251227688442315, "compression_loss": 93.95606994628906, "distillation_loss": 4.163701057434082, "epoch": 3.53, "learning_rate": 3.5953789799943646e-05, "loss": 97.2612, "step": 4174, "task_loss": 3.1657025814056396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8818392360951519, "compression/movement_sparsity/importance_threshold": -0.0008404891131290693, "compression/movement_sparsity/linear_layer_sparsity": 0.854513016039818, "compression/movement_sparsity/model_sparsity": 0.8251578651493333, "compression_loss": 93.97726440429688, "distillation_loss": 4.585553169250488, "epoch": 3.53, "learning_rate": 3.594909364140134e-05, "loss": 98.7109, "step": 4175, "task_loss": 1.8406462669372559 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8820424942453525, "compression/movement_sparsity/importance_threshold": -0.0008390433179534728, "compression/movement_sparsity/linear_layer_sparsity": 0.8548167126653358, "compression/movement_sparsity/model_sparsity": 0.8254511288614845, "compression_loss": 93.99839782714844, "distillation_loss": 3.96030855178833, "epoch": 3.53, "learning_rate": 3.594439748285902e-05, "loss": 98.5717, "step": 4176, "task_loss": 1.5925532579421997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8822455191676379, "compression/movement_sparsity/importance_threshold": -0.0008375991817509334, "compression/movement_sparsity/linear_layer_sparsity": 0.8549225754256069, "compression/movement_sparsity/model_sparsity": 0.8255533549102684, "compression_loss": 94.0195083618164, "distillation_loss": 5.346658706665039, "epoch": 3.53, "learning_rate": 3.593970132431671e-05, "loss": 98.7758, "step": 4177, "task_loss": 3.633540391921997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8824483109958936, "compression/movement_sparsity/importance_threshold": -0.0008361567035691086, "compression/movement_sparsity/linear_layer_sparsity": 0.8550418051777979, "compression/movement_sparsity/model_sparsity": 0.825668488753678, "compression_loss": 94.04059600830078, "distillation_loss": 4.2058916091918945, "epoch": 3.53, "learning_rate": 3.59350051657744e-05, "loss": 97.5607, "step": 4178, "task_loss": 1.83018159866333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8826508698640047, "compression/movement_sparsity/importance_threshold": -0.0008347158824556621, "compression/movement_sparsity/linear_layer_sparsity": 0.855164373696927, "compression/movement_sparsity/model_sparsity": 0.8257868466671101, "compression_loss": 94.06165313720703, "distillation_loss": 3.687270164489746, "epoch": 3.53, "learning_rate": 3.593030900723209e-05, "loss": 98.0476, "step": 4179, "task_loss": 2.2670795917510986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8828531959058562, "compression/movement_sparsity/importance_threshold": -0.0008332767174582551, "compression/movement_sparsity/linear_layer_sparsity": 0.8553533002089496, "compression/movement_sparsity/model_sparsity": 0.8259692829722388, "compression_loss": 94.08261108398438, "distillation_loss": 4.338622093200684, "epoch": 3.53, "learning_rate": 3.592561284868977e-05, "loss": 98.3008, "step": 4180, "task_loss": 1.9232250452041626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8830552892553332, "compression/movement_sparsity/importance_threshold": -0.0008318392076245512, "compression/movement_sparsity/linear_layer_sparsity": 0.8555225041477025, "compression/movement_sparsity/model_sparsity": 0.8261326742351633, "compression_loss": 94.10363006591797, "distillation_loss": 3.7264130115509033, "epoch": 3.53, "learning_rate": 3.592091669014746e-05, "loss": 99.0527, "step": 4181, "task_loss": 2.0932459831237793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.883257150046321, "compression/movement_sparsity/importance_threshold": -0.0008304033520022099, "compression/movement_sparsity/linear_layer_sparsity": 0.8556999596104594, "compression/movement_sparsity/model_sparsity": 0.8263040335568578, "compression_loss": 94.12457275390625, "distillation_loss": 3.1537461280822754, "epoch": 3.53, "learning_rate": 3.591622053160515e-05, "loss": 98.7209, "step": 4182, "task_loss": 2.294995069503784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8834587784127047, "compression/movement_sparsity/importance_threshold": -0.0008289691496388947, "compression/movement_sparsity/linear_layer_sparsity": 0.8557959014632576, "compression/movement_sparsity/model_sparsity": 0.8263966795118606, "compression_loss": 94.14555358886719, "distillation_loss": 4.674450874328613, "epoch": 3.54, "learning_rate": 3.5911524373062836e-05, "loss": 98.7814, "step": 4183, "task_loss": 2.3267574310302734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8836601744883694, "compression/movement_sparsity/importance_threshold": -0.0008275365995822669, "compression/movement_sparsity/linear_layer_sparsity": 0.8559075474448321, "compression/movement_sparsity/model_sparsity": 0.8265044901105049, "compression_loss": 94.16645050048828, "distillation_loss": 5.312593460083008, "epoch": 3.54, "learning_rate": 3.590682821452052e-05, "loss": 98.3821, "step": 4184, "task_loss": 2.1306962966918945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8838613384072003, "compression/movement_sparsity/importance_threshold": -0.0008261057008799883, "compression/movement_sparsity/linear_layer_sparsity": 0.8561290865553388, "compression/movement_sparsity/model_sparsity": 0.8267184186710317, "compression_loss": 94.18731689453125, "distillation_loss": 4.366358757019043, "epoch": 3.54, "learning_rate": 3.590213205597821e-05, "loss": 97.7095, "step": 4185, "task_loss": 1.5608158111572266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8840622703030825, "compression/movement_sparsity/importance_threshold": -0.0008246764525797201, "compression/movement_sparsity/linear_layer_sparsity": 0.8561487495077703, "compression/movement_sparsity/model_sparsity": 0.8267374061405569, "compression_loss": 94.20821380615234, "distillation_loss": 3.129755973815918, "epoch": 3.54, "learning_rate": 3.58974358974359e-05, "loss": 98.0949, "step": 4186, "task_loss": 2.297044277191162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8842629703099012, "compression/movement_sparsity/importance_threshold": -0.0008232488537291242, "compression/movement_sparsity/linear_layer_sparsity": 0.8562086803743082, "compression/movement_sparsity/model_sparsity": 0.8267952781974602, "compression_loss": 94.22900390625, "distillation_loss": 4.295925617218018, "epoch": 3.54, "learning_rate": 3.589273973889359e-05, "loss": 98.7729, "step": 4187, "task_loss": 2.8201539516448975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8844634385615413, "compression/movement_sparsity/importance_threshold": -0.0008218229033758644, "compression/movement_sparsity/linear_layer_sparsity": 0.8563247979187463, "compression/movement_sparsity/model_sparsity": 0.8269074067470275, "compression_loss": 94.24979400634766, "distillation_loss": 4.6681365966796875, "epoch": 3.54, "learning_rate": 3.5888043580351275e-05, "loss": 98.8061, "step": 4188, "task_loss": 3.8346502780914307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8846636751918884, "compression/movement_sparsity/importance_threshold": -0.0008203986005676, "compression/movement_sparsity/linear_layer_sparsity": 0.8564678163853707, "compression/movement_sparsity/model_sparsity": 0.8270455120893472, "compression_loss": 94.27056884765625, "distillation_loss": 4.929719924926758, "epoch": 3.54, "learning_rate": 3.588334742180896e-05, "loss": 98.5744, "step": 4189, "task_loss": 2.8584518432617188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8848636803348271, "compression/movement_sparsity/importance_threshold": -0.0008189759443519955, "compression/movement_sparsity/linear_layer_sparsity": 0.8566324175954162, "compression/movement_sparsity/model_sparsity": 0.827204458741455, "compression_loss": 94.29133605957031, "distillation_loss": 6.388613700866699, "epoch": 3.54, "learning_rate": 3.587865126326665e-05, "loss": 98.9449, "step": 4190, "task_loss": 2.6299591064453125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.885063454124243, "compression/movement_sparsity/importance_threshold": -0.0008175549337767095, "compression/movement_sparsity/linear_layer_sparsity": 0.8567354424037901, "compression/movement_sparsity/model_sparsity": 0.8273039443307199, "compression_loss": 94.31206512451172, "distillation_loss": 3.983421564102173, "epoch": 3.54, "learning_rate": 3.587395510472434e-05, "loss": 97.9241, "step": 4191, "task_loss": 2.255056381225586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.885262996694021, "compression/movement_sparsity/importance_threshold": -0.0008161355678894073, "compression/movement_sparsity/linear_layer_sparsity": 0.856878806671276, "compression/movement_sparsity/model_sparsity": 0.8274423835945777, "compression_loss": 94.3327407836914, "distillation_loss": 3.862727403640747, "epoch": 3.54, "learning_rate": 3.586925894618203e-05, "loss": 98.7535, "step": 4192, "task_loss": 2.551567792892456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8854623081780463, "compression/movement_sparsity/importance_threshold": -0.0008147178457377485, "compression/movement_sparsity/linear_layer_sparsity": 0.8570903771776391, "compression/movement_sparsity/model_sparsity": 0.8276466860031801, "compression_loss": 94.35344696044922, "distillation_loss": 3.483952760696411, "epoch": 3.54, "learning_rate": 3.5864562787639713e-05, "loss": 98.6265, "step": 4193, "task_loss": 2.505422353744507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8856613887102041, "compression/movement_sparsity/importance_threshold": -0.0008133017663693948, "compression/movement_sparsity/linear_layer_sparsity": 0.8572622163574396, "compression/movement_sparsity/model_sparsity": 0.8278126219785152, "compression_loss": 94.37409973144531, "distillation_loss": 4.788325309753418, "epoch": 3.54, "learning_rate": 3.58598666290974e-05, "loss": 98.2297, "step": 4194, "task_loss": 2.677701234817505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8858602384243793, "compression/movement_sparsity/importance_threshold": -0.0008118873288320092, "compression/movement_sparsity/linear_layer_sparsity": 0.8573822092563593, "compression/movement_sparsity/model_sparsity": 0.8279284927522157, "compression_loss": 94.39476776123047, "distillation_loss": 4.858417510986328, "epoch": 3.55, "learning_rate": 3.5855170470555086e-05, "loss": 98.7062, "step": 4195, "task_loss": 2.7905311584472656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8860588574544573, "compression/movement_sparsity/importance_threshold": -0.0008104745321732528, "compression/movement_sparsity/linear_layer_sparsity": 0.8574359753282295, "compression/movement_sparsity/model_sparsity": 0.8279804117941133, "compression_loss": 94.41535949707031, "distillation_loss": 4.537137031555176, "epoch": 3.55, "learning_rate": 3.585047431201278e-05, "loss": 98.2921, "step": 4196, "task_loss": 1.8492285013198853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8862572459343231, "compression/movement_sparsity/importance_threshold": -0.0008090633754407892, "compression/movement_sparsity/linear_layer_sparsity": 0.8576468065361991, "compression/movement_sparsity/model_sparsity": 0.8281840003014966, "compression_loss": 94.43598175048828, "distillation_loss": 3.53853702545166, "epoch": 3.55, "learning_rate": 3.584577815347046e-05, "loss": 98.5277, "step": 4197, "task_loss": 2.0268750190734863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.886455403997862, "compression/movement_sparsity/importance_threshold": -0.000807653857682277, "compression/movement_sparsity/linear_layer_sparsity": 0.8577770303709503, "compression/movement_sparsity/model_sparsity": 0.8283097505469088, "compression_loss": 94.45655059814453, "distillation_loss": 3.894145965576172, "epoch": 3.55, "learning_rate": 3.584108199492815e-05, "loss": 98.4578, "step": 4198, "task_loss": 1.9914817810058594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.886653331778959, "compression/movement_sparsity/importance_threshold": -0.0008062459779453815, "compression/movement_sparsity/linear_layer_sparsity": 0.8579327480761071, "compression/movement_sparsity/model_sparsity": 0.8284601188698497, "compression_loss": 94.47702026367188, "distillation_loss": 4.716765880584717, "epoch": 3.55, "learning_rate": 3.583638583638584e-05, "loss": 98.2043, "step": 4199, "task_loss": 2.6712300777435303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8868510294114992, "compression/movement_sparsity/importance_threshold": -0.0008048397352777631, "compression/movement_sparsity/linear_layer_sparsity": 0.8581118133014949, "compression/movement_sparsity/model_sparsity": 0.8286330326538763, "compression_loss": 94.49752044677734, "distillation_loss": 4.191227436065674, "epoch": 3.55, "learning_rate": 3.5831689677843525e-05, "loss": 98.7462, "step": 4200, "task_loss": 2.4574506282806396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8870484970293679, "compression/movement_sparsity/importance_threshold": -0.0008034351287270819, "compression/movement_sparsity/linear_layer_sparsity": 0.8581723642247497, "compression/movement_sparsity/model_sparsity": 0.828691503466641, "compression_loss": 94.5180435180664, "distillation_loss": 3.4395041465759277, "epoch": 3.55, "learning_rate": 3.582699351930121e-05, "loss": 98.5805, "step": 4201, "task_loss": 1.3294755220413208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8872457347664502, "compression/movement_sparsity/importance_threshold": -0.0008020321573410016, "compression/movement_sparsity/linear_layer_sparsity": 0.858327771901548, "compression/movement_sparsity/model_sparsity": 0.8288415724116512, "compression_loss": 94.53842163085938, "distillation_loss": 3.872023105621338, "epoch": 3.55, "learning_rate": 3.58222973607589e-05, "loss": 98.6479, "step": 4202, "task_loss": 1.6334009170532227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8874427427566312, "compression/movement_sparsity/importance_threshold": -0.0008006308201671843, "compression/movement_sparsity/linear_layer_sparsity": 0.8584195760681764, "compression/movement_sparsity/model_sparsity": 0.8289302228227334, "compression_loss": 94.55889129638672, "distillation_loss": 3.421872615814209, "epoch": 3.55, "learning_rate": 3.581760120221659e-05, "loss": 98.9347, "step": 4203, "task_loss": 1.9807744026184082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8876395211337959, "compression/movement_sparsity/importance_threshold": -0.0007992311162532918, "compression/movement_sparsity/linear_layer_sparsity": 0.8584326091834025, "compression/movement_sparsity/model_sparsity": 0.8289428082103568, "compression_loss": 94.5792236328125, "distillation_loss": 3.831367015838623, "epoch": 3.55, "learning_rate": 3.581290504367428e-05, "loss": 99.2217, "step": 4204, "task_loss": 1.5426476001739502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8878360700318296, "compression/movement_sparsity/importance_threshold": -0.0007978330446469853, "compression/movement_sparsity/linear_layer_sparsity": 0.8585766173559408, "compression/movement_sparsity/model_sparsity": 0.8290818692591475, "compression_loss": 94.59965515136719, "distillation_loss": 4.2861504554748535, "epoch": 3.55, "learning_rate": 3.580820888513196e-05, "loss": 99.2577, "step": 4205, "task_loss": 2.2252068519592285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8880323895846174, "compression/movement_sparsity/importance_threshold": -0.0007964366043959267, "compression/movement_sparsity/linear_layer_sparsity": 0.8586760529898563, "compression/movement_sparsity/model_sparsity": 0.8291778889731382, "compression_loss": 94.62002563476562, "distillation_loss": 5.069375038146973, "epoch": 3.56, "learning_rate": 3.580351272658965e-05, "loss": 99.1623, "step": 4206, "task_loss": 2.578991651535034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8882284799260447, "compression/movement_sparsity/importance_threshold": -0.0007950417945477772, "compression/movement_sparsity/linear_layer_sparsity": 0.8587866734930142, "compression/movement_sparsity/model_sparsity": 0.8292847093217042, "compression_loss": 94.64039611816406, "distillation_loss": 3.330601692199707, "epoch": 3.56, "learning_rate": 3.5798816568047336e-05, "loss": 98.594, "step": 4207, "task_loss": 2.588888168334961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8884243411899961, "compression/movement_sparsity/importance_threshold": -0.0007936486141502004, "compression/movement_sparsity/linear_layer_sparsity": 0.8588902825856022, "compression/movement_sparsity/model_sparsity": 0.829384759123223, "compression_loss": 94.66069030761719, "distillation_loss": 4.069555759429932, "epoch": 3.56, "learning_rate": 3.579412040950503e-05, "loss": 98.7941, "step": 4208, "task_loss": 2.126737594604492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8886199735103572, "compression/movement_sparsity/importance_threshold": -0.0007922570622508575, "compression/movement_sparsity/linear_layer_sparsity": 0.8589953583508094, "compression/movement_sparsity/model_sparsity": 0.8294862252126444, "compression_loss": 94.6810073852539, "distillation_loss": 3.434044599533081, "epoch": 3.56, "learning_rate": 3.5789424250962715e-05, "loss": 98.9831, "step": 4209, "task_loss": 2.4867312908172607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.888815377021013, "compression/movement_sparsity/importance_threshold": -0.0007908671378974094, "compression/movement_sparsity/linear_layer_sparsity": 0.8591402966084233, "compression/movement_sparsity/model_sparsity": 0.8296261843952271, "compression_loss": 94.70136260986328, "distillation_loss": 5.186169147491455, "epoch": 3.56, "learning_rate": 3.57847280924204e-05, "loss": 99.6794, "step": 4210, "task_loss": 2.215132713317871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8890105518558485, "compression/movement_sparsity/importance_threshold": -0.0007894788401375183, "compression/movement_sparsity/linear_layer_sparsity": 0.859149180113312, "compression/movement_sparsity/model_sparsity": 0.829634762724394, "compression_loss": 94.72156524658203, "distillation_loss": 4.256136894226074, "epoch": 3.56, "learning_rate": 3.578003193387809e-05, "loss": 99.6258, "step": 4211, "task_loss": 2.6034576892852783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.889205498148749, "compression/movement_sparsity/importance_threshold": -0.0007880921680188478, "compression/movement_sparsity/linear_layer_sparsity": 0.8593201130563721, "compression/movement_sparsity/model_sparsity": 0.8297998235950087, "compression_loss": 94.7418212890625, "distillation_loss": 4.428223609924316, "epoch": 3.56, "learning_rate": 3.577533577533578e-05, "loss": 99.3173, "step": 4212, "task_loss": 2.221327543258667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8894002160335998, "compression/movement_sparsity/importance_threshold": -0.0007867071205890563, "compression/movement_sparsity/linear_layer_sparsity": 0.8595148227896982, "compression/movement_sparsity/model_sparsity": 0.8299878444499977, "compression_loss": 94.7620849609375, "distillation_loss": 4.726651191711426, "epoch": 3.56, "learning_rate": 3.577063961679347e-05, "loss": 98.5791, "step": 4213, "task_loss": 1.927122950553894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8895947056442857, "compression/movement_sparsity/importance_threshold": -0.0007853236968958085, "compression/movement_sparsity/linear_layer_sparsity": 0.8596951042801847, "compression/movement_sparsity/model_sparsity": 0.8301619327166755, "compression_loss": 94.78226470947266, "distillation_loss": 3.7944693565368652, "epoch": 3.56, "learning_rate": 3.576594345825115e-05, "loss": 98.5151, "step": 4214, "task_loss": 2.00724458694458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8897889671146918, "compression/movement_sparsity/importance_threshold": -0.0007839418959867672, "compression/movement_sparsity/linear_layer_sparsity": 0.8597623923581539, "compression/movement_sparsity/model_sparsity": 0.830226909242164, "compression_loss": 94.80245208740234, "distillation_loss": 5.057916641235352, "epoch": 3.56, "learning_rate": 3.576124729970884e-05, "loss": 99.6454, "step": 4215, "task_loss": 2.8407576084136963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8899830005787037, "compression/movement_sparsity/importance_threshold": -0.00078256171690959, "compression/movement_sparsity/linear_layer_sparsity": 0.859935519348059, "compression/movement_sparsity/model_sparsity": 0.830394088787365, "compression_loss": 94.82257843017578, "distillation_loss": 4.312788963317871, "epoch": 3.56, "learning_rate": 3.5756551141166526e-05, "loss": 98.5746, "step": 4216, "task_loss": 1.9678943157196045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8901768061702062, "compression/movement_sparsity/importance_threshold": -0.0007811831587119416, "compression/movement_sparsity/linear_layer_sparsity": 0.8599229751237061, "compression/movement_sparsity/model_sparsity": 0.8303819754957091, "compression_loss": 94.84273529052734, "distillation_loss": 4.064949035644531, "epoch": 3.56, "learning_rate": 3.575185498262422e-05, "loss": 98.4776, "step": 4217, "task_loss": 2.020219326019287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8903703840230844, "compression/movement_sparsity/importance_threshold": -0.0007798062204414846, "compression/movement_sparsity/linear_layer_sparsity": 0.8600422048758971, "compression/movement_sparsity/model_sparsity": 0.8304971093391187, "compression_loss": 94.86283111572266, "distillation_loss": 4.893569469451904, "epoch": 3.57, "learning_rate": 3.57471588240819e-05, "loss": 99.128, "step": 4218, "task_loss": 2.4209694862365723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8905637342712236, "compression/movement_sparsity/importance_threshold": -0.0007784309011458794, "compression/movement_sparsity/linear_layer_sparsity": 0.8600572650996212, "compression/movement_sparsity/model_sparsity": 0.8305116521978272, "compression_loss": 94.88290405273438, "distillation_loss": 4.105799198150635, "epoch": 3.57, "learning_rate": 3.574246266553959e-05, "loss": 98.8072, "step": 4219, "task_loss": 2.2616801261901855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.890756857048509, "compression/movement_sparsity/importance_threshold": -0.000777057199872787, "compression/movement_sparsity/linear_layer_sparsity": 0.8601510248297419, "compression/movement_sparsity/model_sparsity": 0.8306021909927797, "compression_loss": 94.90296936035156, "distillation_loss": 4.2403035163879395, "epoch": 3.57, "learning_rate": 3.573776650699728e-05, "loss": 99.2131, "step": 4220, "task_loss": 3.604950428009033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8909497524888255, "compression/movement_sparsity/importance_threshold": -0.000775685115669872, "compression/movement_sparsity/linear_layer_sparsity": 0.8602643401947856, "compression/movement_sparsity/model_sparsity": 0.8307116136264352, "compression_loss": 94.92298126220703, "distillation_loss": 4.149240970611572, "epoch": 3.57, "learning_rate": 3.5733070348454965e-05, "loss": 99.0399, "step": 4221, "task_loss": 1.2424564361572266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8911424207260584, "compression/movement_sparsity/importance_threshold": -0.0007743146475847939, "compression/movement_sparsity/linear_layer_sparsity": 0.8604152524603851, "compression/movement_sparsity/model_sparsity": 0.830857341591451, "compression_loss": 94.94306182861328, "distillation_loss": 5.266828536987305, "epoch": 3.57, "learning_rate": 3.572837418991266e-05, "loss": 99.8664, "step": 4222, "task_loss": 3.709794759750366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8913348618940928, "compression/movement_sparsity/importance_threshold": -0.0007729457946652153, "compression/movement_sparsity/linear_layer_sparsity": 0.8605465733185588, "compression/movement_sparsity/model_sparsity": 0.8309841511741562, "compression_loss": 94.9630355834961, "distillation_loss": 3.9843170642852783, "epoch": 3.57, "learning_rate": 3.572367803137034e-05, "loss": 98.6347, "step": 4223, "task_loss": 1.9333255290985107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8915270761268139, "compression/movement_sparsity/importance_threshold": -0.0007715785559587983, "compression/movement_sparsity/linear_layer_sparsity": 0.8606184164285648, "compression/movement_sparsity/model_sparsity": 0.8310535262523183, "compression_loss": 94.98302459716797, "distillation_loss": 4.177804946899414, "epoch": 3.57, "learning_rate": 3.571898187282803e-05, "loss": 99.6266, "step": 4224, "task_loss": 3.7377443313598633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8917190635581068, "compression/movement_sparsity/importance_threshold": -0.000770212930513204, "compression/movement_sparsity/linear_layer_sparsity": 0.8606636805689106, "compression/movement_sparsity/model_sparsity": 0.8310972354301944, "compression_loss": 95.0029525756836, "distillation_loss": 6.713780879974365, "epoch": 3.57, "learning_rate": 3.571428571428572e-05, "loss": 100.1659, "step": 4225, "task_loss": 3.0087881088256836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8919108243218565, "compression/movement_sparsity/importance_threshold": -0.000768848917376096, "compression/movement_sparsity/linear_layer_sparsity": 0.8607536484137233, "compression/movement_sparsity/model_sparsity": 0.8311841126027643, "compression_loss": 95.02294158935547, "distillation_loss": 3.4965853691101074, "epoch": 3.57, "learning_rate": 3.57095895557434e-05, "loss": 99.6297, "step": 4226, "task_loss": 1.815160870552063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8921023585519484, "compression/movement_sparsity/importance_threshold": -0.0007674865155951338, "compression/movement_sparsity/linear_layer_sparsity": 0.8609648134983867, "compression/movement_sparsity/model_sparsity": 0.8313880235171497, "compression_loss": 95.04283905029297, "distillation_loss": 3.2229981422424316, "epoch": 3.57, "learning_rate": 3.570489339720109e-05, "loss": 98.8347, "step": 4227, "task_loss": 2.4864983558654785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8922936663822675, "compression/movement_sparsity/importance_threshold": -0.000766125724217981, "compression/movement_sparsity/linear_layer_sparsity": 0.8610182337693953, "compression/movement_sparsity/model_sparsity": 0.8314396086375093, "compression_loss": 95.06271362304688, "distillation_loss": 3.58321213722229, "epoch": 3.57, "learning_rate": 3.5700197238658776e-05, "loss": 99.0894, "step": 4228, "task_loss": 2.1962385177612305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.892484747946699, "compression/movement_sparsity/importance_threshold": -0.0007647665422922988, "compression/movement_sparsity/linear_layer_sparsity": 0.861187771584842, "compression/movement_sparsity/model_sparsity": 0.831603322307436, "compression_loss": 95.0825424194336, "distillation_loss": 4.64139461517334, "epoch": 3.57, "learning_rate": 3.569550108011647e-05, "loss": 100.5131, "step": 4229, "task_loss": 3.0139107704162598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8926756033791279, "compression/movement_sparsity/importance_threshold": -0.0007634089688657491, "compression/movement_sparsity/linear_layer_sparsity": 0.8612467604421367, "compression/movement_sparsity/model_sparsity": 0.8316602847160116, "compression_loss": 95.10235595703125, "distillation_loss": 5.814520359039307, "epoch": 3.58, "learning_rate": 3.5690804921574155e-05, "loss": 100.3708, "step": 4230, "task_loss": 2.817002296447754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8928662328134394, "compression/movement_sparsity/importance_threshold": -0.0007620530029859938, "compression/movement_sparsity/linear_layer_sparsity": 0.8612659106553598, "compression/movement_sparsity/model_sparsity": 0.8316787770604976, "compression_loss": 95.12210845947266, "distillation_loss": 3.7117342948913574, "epoch": 3.58, "learning_rate": 3.568610876303184e-05, "loss": 98.8351, "step": 4231, "task_loss": 1.847719430923462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8930566363835187, "compression/movement_sparsity/importance_threshold": -0.0007606986437006941, "compression/movement_sparsity/linear_layer_sparsity": 0.8614081182785852, "compression/movement_sparsity/model_sparsity": 0.8318160994143834, "compression_loss": 95.14189147949219, "distillation_loss": 4.158713340759277, "epoch": 3.58, "learning_rate": 3.568141260448953e-05, "loss": 99.1017, "step": 4232, "task_loss": 2.554323434829712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.893246814223251, "compression/movement_sparsity/importance_threshold": -0.0007593458900575127, "compression/movement_sparsity/linear_layer_sparsity": 0.8614973706733396, "compression/movement_sparsity/model_sparsity": 0.8319022857148055, "compression_loss": 95.1616439819336, "distillation_loss": 3.056636095046997, "epoch": 3.58, "learning_rate": 3.5676716445947214e-05, "loss": 98.3473, "step": 4233, "task_loss": 1.0564689636230469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8934367664665213, "compression/movement_sparsity/importance_threshold": -0.0007579947411041118, "compression/movement_sparsity/linear_layer_sparsity": 0.8616582753914179, "compression/movement_sparsity/model_sparsity": 0.8320576628608171, "compression_loss": 95.18132781982422, "distillation_loss": 3.6260249614715576, "epoch": 3.58, "learning_rate": 3.567202028740491e-05, "loss": 99.1328, "step": 4234, "task_loss": 2.8066842555999756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8936264932472149, "compression/movement_sparsity/importance_threshold": -0.0007566451958881514, "compression/movement_sparsity/linear_layer_sparsity": 0.8617722108131787, "compression/movement_sparsity/model_sparsity": 0.8321676842503339, "compression_loss": 95.20101165771484, "distillation_loss": 4.211832046508789, "epoch": 3.58, "learning_rate": 3.5667324128862594e-05, "loss": 98.9401, "step": 4235, "task_loss": 2.206688165664673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8938159946992167, "compression/movement_sparsity/importance_threshold": -0.0007552972534572953, "compression/movement_sparsity/linear_layer_sparsity": 0.8619217160269971, "compression/movement_sparsity/model_sparsity": 0.8323120535001258, "compression_loss": 95.22076416015625, "distillation_loss": 4.004524230957031, "epoch": 3.58, "learning_rate": 3.566262797032028e-05, "loss": 98.7387, "step": 4236, "task_loss": 2.603062152862549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8940052709564119, "compression/movement_sparsity/importance_threshold": -0.0007539509128592055, "compression/movement_sparsity/linear_layer_sparsity": 0.8619809433676444, "compression/movement_sparsity/model_sparsity": 0.8323692461994173, "compression_loss": 95.24041748046875, "distillation_loss": 4.706895351409912, "epoch": 3.58, "learning_rate": 3.5657931811777967e-05, "loss": 100.3652, "step": 4237, "task_loss": 2.424591302871704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.894194322152686, "compression/movement_sparsity/importance_threshold": -0.0007526061731415413, "compression/movement_sparsity/linear_layer_sparsity": 0.8621312475006945, "compression/movement_sparsity/model_sparsity": 0.8325143869231075, "compression_loss": 95.26010131835938, "distillation_loss": 3.975720167160034, "epoch": 3.58, "learning_rate": 3.565323565323565e-05, "loss": 100.1427, "step": 4238, "task_loss": 1.855724573135376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8943831484219236, "compression/movement_sparsity/importance_threshold": -0.0007512630333519682, "compression/movement_sparsity/linear_layer_sparsity": 0.8622994855818689, "compression/movement_sparsity/model_sparsity": 0.8326768455086326, "compression_loss": 95.27971649169922, "distillation_loss": 5.753711700439453, "epoch": 3.58, "learning_rate": 3.5648539494693346e-05, "loss": 100.1962, "step": 4239, "task_loss": 3.3861563205718994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8945717498980102, "compression/movement_sparsity/importance_threshold": -0.0007499214925381446, "compression/movement_sparsity/linear_layer_sparsity": 0.8623597503251005, "compression/movement_sparsity/model_sparsity": 0.8327350399725383, "compression_loss": 95.29933166503906, "distillation_loss": 6.4863057136535645, "epoch": 3.58, "learning_rate": 3.5643843336151026e-05, "loss": 100.2454, "step": 4240, "task_loss": 3.9295787811279297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8947601267148309, "compression/movement_sparsity/importance_threshold": -0.0007485815497477351, "compression/movement_sparsity/linear_layer_sparsity": 0.8624312714825804, "compression/movement_sparsity/model_sparsity": 0.8328041041582339, "compression_loss": 95.31898498535156, "distillation_loss": 5.76236629486084, "epoch": 3.58, "learning_rate": 3.563914717760872e-05, "loss": 99.7186, "step": 4241, "task_loss": 2.954179048538208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8949482790062707, "compression/movement_sparsity/importance_threshold": -0.0007472432040283991, "compression/movement_sparsity/linear_layer_sparsity": 0.8626007496771889, "compression/movement_sparsity/model_sparsity": 0.8329677602554818, "compression_loss": 95.33854675292969, "distillation_loss": 3.9863510131835938, "epoch": 3.59, "learning_rate": 3.5634451019066405e-05, "loss": 99.1871, "step": 4242, "task_loss": 2.7201592922210693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8951362069062149, "compression/movement_sparsity/importance_threshold": -0.0007459064544278003, "compression/movement_sparsity/linear_layer_sparsity": 0.8627781574432754, "compression/movement_sparsity/model_sparsity": 0.8331390735190329, "compression_loss": 95.35811614990234, "distillation_loss": 4.400084018707275, "epoch": 3.59, "learning_rate": 3.56297548605241e-05, "loss": 98.6633, "step": 4243, "task_loss": 2.7363290786743164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8953239105485484, "compression/movement_sparsity/importance_threshold": -0.0007445712999936006, "compression/movement_sparsity/linear_layer_sparsity": 0.8629914331056103, "compression/movement_sparsity/model_sparsity": 0.8333450225062541, "compression_loss": 95.377685546875, "distillation_loss": 3.602400302886963, "epoch": 3.59, "learning_rate": 3.562505870198178e-05, "loss": 99.8674, "step": 4244, "task_loss": 2.8250083923339844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8955113900671567, "compression/movement_sparsity/importance_threshold": -0.0007432377397734594, "compression/movement_sparsity/linear_layer_sparsity": 0.863143156214609, "compression/movement_sparsity/model_sparsity": 0.8334915334597038, "compression_loss": 95.39716339111328, "distillation_loss": 2.9277560710906982, "epoch": 3.59, "learning_rate": 3.5620362543439464e-05, "loss": 99.7592, "step": 4245, "task_loss": 0.8531313538551331 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8956986455959246, "compression/movement_sparsity/importance_threshold": -0.0007419057728150422, "compression/movement_sparsity/linear_layer_sparsity": 0.863223322393625, "compression/movement_sparsity/model_sparsity": 0.8335689456838505, "compression_loss": 95.41664123535156, "distillation_loss": 3.5354888439178467, "epoch": 3.59, "learning_rate": 3.561566638489716e-05, "loss": 99.2372, "step": 4246, "task_loss": 1.7557638883590698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8958856772687375, "compression/movement_sparsity/importance_threshold": -0.0007405753981660074, "compression/movement_sparsity/linear_layer_sparsity": 0.8633184295546886, "compression/movement_sparsity/model_sparsity": 0.8336607856213478, "compression_loss": 95.43604278564453, "distillation_loss": 5.067050933837891, "epoch": 3.59, "learning_rate": 3.5610970226354844e-05, "loss": 99.2718, "step": 4247, "task_loss": 1.9731736183166504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8960724852194804, "compression/movement_sparsity/importance_threshold": -0.0007392466148740196, "compression/movement_sparsity/linear_layer_sparsity": 0.8633264902920105, "compression/movement_sparsity/model_sparsity": 0.8336685694475449, "compression_loss": 95.45539093017578, "distillation_loss": 4.1666693687438965, "epoch": 3.59, "learning_rate": 3.560627406781253e-05, "loss": 99.2332, "step": 4248, "task_loss": 2.7883949279785156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8962590695820384, "compression/movement_sparsity/importance_threshold": -0.000737919421986739, "compression/movement_sparsity/linear_layer_sparsity": 0.8633788373879319, "compression/movement_sparsity/model_sparsity": 0.8337191182596829, "compression_loss": 95.47476196289062, "distillation_loss": 4.44405460357666, "epoch": 3.59, "learning_rate": 3.5601577909270216e-05, "loss": 100.0897, "step": 4249, "task_loss": 2.6302919387817383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8964454304902967, "compression/movement_sparsity/importance_threshold": -0.0007365938185518277, "compression/movement_sparsity/linear_layer_sparsity": 0.863463320115632, "compression/movement_sparsity/model_sparsity": 0.8338006987457872, "compression_loss": 95.4940414428711, "distillation_loss": 3.557107925415039, "epoch": 3.59, "learning_rate": 3.559688175072791e-05, "loss": 100.2188, "step": 4250, "task_loss": 1.757676124572754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8966315680781406, "compression/movement_sparsity/importance_threshold": -0.0007352698036169475, "compression/movement_sparsity/linear_layer_sparsity": 0.8635426277545781, "compression/movement_sparsity/model_sparsity": 0.8338772819233568, "compression_loss": 95.51336669921875, "distillation_loss": 4.5386552810668945, "epoch": 3.59, "learning_rate": 3.5592185592185596e-05, "loss": 99.6763, "step": 4251, "task_loss": 2.593977689743042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8968174824794549, "compression/movement_sparsity/importance_threshold": -0.0007339473762297613, "compression/movement_sparsity/linear_layer_sparsity": 0.8636356243379703, "compression/movement_sparsity/model_sparsity": 0.8339670837880183, "compression_loss": 95.53268432617188, "distillation_loss": 3.449946403503418, "epoch": 3.59, "learning_rate": 3.558748943364328e-05, "loss": 99.5308, "step": 4252, "task_loss": 1.8649253845214844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.897003173828125, "compression/movement_sparsity/importance_threshold": -0.0007326265354379302, "compression/movement_sparsity/linear_layer_sparsity": 0.8637397342455989, "compression/movement_sparsity/model_sparsity": 0.8340676172000405, "compression_loss": 95.55191040039062, "distillation_loss": 3.617687702178955, "epoch": 3.59, "learning_rate": 3.558279327510097e-05, "loss": 99.9631, "step": 4253, "task_loss": 4.770885467529297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.897188642258036, "compression/movement_sparsity/importance_threshold": -0.0007313072802891162, "compression/movement_sparsity/linear_layer_sparsity": 0.8638563168325748, "compression/movement_sparsity/model_sparsity": 0.8341801948165037, "compression_loss": 95.57107543945312, "distillation_loss": 3.1359972953796387, "epoch": 3.6, "learning_rate": 3.5578097116558655e-05, "loss": 99.438, "step": 4254, "task_loss": 1.231838583946228 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8973738879030729, "compression/movement_sparsity/importance_threshold": -0.0007299896098309803, "compression/movement_sparsity/linear_layer_sparsity": 0.8638913858095918, "compression/movement_sparsity/model_sparsity": 0.8342140590662754, "compression_loss": 95.5902099609375, "distillation_loss": 3.732166290283203, "epoch": 3.6, "learning_rate": 3.557340095801635e-05, "loss": 100.0868, "step": 4255, "task_loss": 1.398179531097412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8975589108971211, "compression/movement_sparsity/importance_threshold": -0.0007286735231111846, "compression/movement_sparsity/linear_layer_sparsity": 0.8640366937164025, "compression/movement_sparsity/model_sparsity": 0.8343543751994678, "compression_loss": 95.60934448242188, "distillation_loss": 5.503180503845215, "epoch": 3.6, "learning_rate": 3.5568704799474034e-05, "loss": 99.7338, "step": 4256, "task_loss": 3.652775764465332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8977437113740656, "compression/movement_sparsity/importance_threshold": -0.0007273590191773918, "compression/movement_sparsity/linear_layer_sparsity": 0.8642184418795082, "compression/movement_sparsity/model_sparsity": 0.8345298797540481, "compression_loss": 95.62844848632812, "distillation_loss": 4.508296966552734, "epoch": 3.6, "learning_rate": 3.556400864093172e-05, "loss": 100.1312, "step": 4257, "task_loss": 2.658705711364746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8979282894677915, "compression/movement_sparsity/importance_threshold": -0.0007260460970772639, "compression/movement_sparsity/linear_layer_sparsity": 0.8643841639613115, "compression/movement_sparsity/model_sparsity": 0.8346899087725207, "compression_loss": 95.64751434326172, "distillation_loss": 4.3883867263793945, "epoch": 3.6, "learning_rate": 3.555931248238941e-05, "loss": 99.7787, "step": 4258, "task_loss": 1.8618927001953125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8981126453121838, "compression/movement_sparsity/importance_threshold": -0.0007247347558584621, "compression/movement_sparsity/linear_layer_sparsity": 0.8644849947228403, "compression/movement_sparsity/model_sparsity": 0.8347872756871993, "compression_loss": 95.66659545898438, "distillation_loss": 3.5032224655151367, "epoch": 3.6, "learning_rate": 3.555461632384709e-05, "loss": 99.5797, "step": 4259, "task_loss": 2.0159356594085693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.898296779041128, "compression/movement_sparsity/importance_threshold": -0.0007234249945686482, "compression/movement_sparsity/linear_layer_sparsity": 0.8645387011738723, "compression/movement_sparsity/model_sparsity": 0.8348391371564179, "compression_loss": 95.68562316894531, "distillation_loss": 2.963238000869751, "epoch": 3.6, "learning_rate": 3.5549920165304786e-05, "loss": 99.7989, "step": 4260, "task_loss": 2.1831116676330566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.898480690788509, "compression/movement_sparsity/importance_threshold": -0.0007221168122554843, "compression/movement_sparsity/linear_layer_sparsity": 0.8646998086028005, "compression/movement_sparsity/model_sparsity": 0.834994710049538, "compression_loss": 95.70468139648438, "distillation_loss": 3.3692617416381836, "epoch": 3.6, "learning_rate": 3.5545224006762466e-05, "loss": 99.5937, "step": 4261, "task_loss": 2.0480234622955322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.898664380688212, "compression/movement_sparsity/importance_threshold": -0.0007208102079666323, "compression/movement_sparsity/linear_layer_sparsity": 0.864842958235269, "compression/movement_sparsity/model_sparsity": 0.8351329420517514, "compression_loss": 95.72366333007812, "distillation_loss": 5.954610824584961, "epoch": 3.6, "learning_rate": 3.554052784822016e-05, "loss": 100.0368, "step": 4262, "task_loss": 3.0649073123931885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8988478488741221, "compression/movement_sparsity/importance_threshold": -0.0007195051807497533, "compression/movement_sparsity/linear_layer_sparsity": 0.8648797085199227, "compression/movement_sparsity/model_sparsity": 0.8351684298510702, "compression_loss": 95.74269104003906, "distillation_loss": 5.765130996704102, "epoch": 3.6, "learning_rate": 3.5535831689677845e-05, "loss": 100.7096, "step": 4263, "task_loss": 3.782296657562256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8990310954801245, "compression/movement_sparsity/importance_threshold": -0.0007182017296525102, "compression/movement_sparsity/linear_layer_sparsity": 0.8649936916383539, "compression/movement_sparsity/model_sparsity": 0.8352784972987302, "compression_loss": 95.76166534423828, "distillation_loss": 3.753304958343506, "epoch": 3.6, "learning_rate": 3.553113553113553e-05, "loss": 100.619, "step": 4264, "task_loss": 1.1387360095977783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8992141206401043, "compression/movement_sparsity/importance_threshold": -0.000716899853722564, "compression/movement_sparsity/linear_layer_sparsity": 0.8649877772512066, "compression/movement_sparsity/model_sparsity": 0.8352727860889761, "compression_loss": 95.78064727783203, "distillation_loss": 5.295431613922119, "epoch": 3.6, "learning_rate": 3.5526439372593225e-05, "loss": 100.4319, "step": 4265, "task_loss": 2.787222146987915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8993969244879467, "compression/movement_sparsity/importance_threshold": -0.0007155995520075776, "compression/movement_sparsity/linear_layer_sparsity": 0.8651284228084716, "compression/movement_sparsity/model_sparsity": 0.8354086000386727, "compression_loss": 95.7995834350586, "distillation_loss": 3.6459293365478516, "epoch": 3.61, "learning_rate": 3.5521743214050904e-05, "loss": 100.1359, "step": 4266, "task_loss": 1.2859519720077515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8995795071575368, "compression/movement_sparsity/importance_threshold": -0.000714300823555212, "compression/movement_sparsity/linear_layer_sparsity": 0.8652258909547271, "compression/movement_sparsity/model_sparsity": 0.8355027198542573, "compression_loss": 95.81851959228516, "distillation_loss": 4.66933012008667, "epoch": 3.61, "learning_rate": 3.55170470555086e-05, "loss": 100.4528, "step": 4267, "task_loss": 3.1611311435699463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8997618687827597, "compression/movement_sparsity/importance_threshold": -0.0007130036674131294, "compression/movement_sparsity/linear_layer_sparsity": 0.865322560181751, "compression/movement_sparsity/model_sparsity": 0.8355960681959437, "compression_loss": 95.83747100830078, "distillation_loss": 3.478475332260132, "epoch": 3.61, "learning_rate": 3.5512350896966284e-05, "loss": 100.3259, "step": 4268, "task_loss": 2.1531853675842285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8999440094975006, "compression/movement_sparsity/importance_threshold": -0.0007117080826289906, "compression/movement_sparsity/linear_layer_sparsity": 0.8654633488290276, "compression/movement_sparsity/model_sparsity": 0.8357320203200698, "compression_loss": 95.8563232421875, "distillation_loss": 7.154182434082031, "epoch": 3.61, "learning_rate": 3.550765473842397e-05, "loss": 100.9796, "step": 4269, "task_loss": 3.1148579120635986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9001259294356445, "compression/movement_sparsity/importance_threshold": -0.0007104140682504595, "compression/movement_sparsity/linear_layer_sparsity": 0.8655573589666689, "compression/movement_sparsity/model_sparsity": 0.835822800920274, "compression_loss": 95.875244140625, "distillation_loss": 4.445441246032715, "epoch": 3.61, "learning_rate": 3.5502958579881656e-05, "loss": 99.9588, "step": 4270, "task_loss": 1.9386053085327148 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9003076287310768, "compression/movement_sparsity/importance_threshold": -0.0007091216233251962, "compression/movement_sparsity/linear_layer_sparsity": 0.8656521441752063, "compression/movement_sparsity/model_sparsity": 0.8359143299653048, "compression_loss": 95.89413452148438, "distillation_loss": 4.191816329956055, "epoch": 3.61, "learning_rate": 3.549826242133934e-05, "loss": 100.4322, "step": 4271, "task_loss": 2.9691805839538574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9004891075176825, "compression/movement_sparsity/importance_threshold": -0.0007078307469008628, "compression/movement_sparsity/linear_layer_sparsity": 0.8658163876602226, "compression/movement_sparsity/model_sparsity": 0.8360729311813387, "compression_loss": 95.91290283203125, "distillation_loss": 3.0237269401550293, "epoch": 3.61, "learning_rate": 3.5493566262797036e-05, "loss": 99.912, "step": 4272, "task_loss": 2.6129956245422363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9006703659293467, "compression/movement_sparsity/importance_threshold": -0.0007065414380251229, "compression/movement_sparsity/linear_layer_sparsity": 0.8659663936890818, "compression/movement_sparsity/model_sparsity": 0.836217784041634, "compression_loss": 95.93171691894531, "distillation_loss": 2.2251439094543457, "epoch": 3.61, "learning_rate": 3.548887010425472e-05, "loss": 99.7097, "step": 4273, "task_loss": 1.384535312652588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9008514040999546, "compression/movement_sparsity/importance_threshold": -0.0007052536957456359, "compression/movement_sparsity/linear_layer_sparsity": 0.8660423744852576, "compression/movement_sparsity/model_sparsity": 0.8362911546637168, "compression_loss": 95.95042419433594, "distillation_loss": 5.013421058654785, "epoch": 3.61, "learning_rate": 3.548417394571241e-05, "loss": 100.4736, "step": 4274, "task_loss": 2.9494993686676025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9010322221633913, "compression/movement_sparsity/importance_threshold": -0.0007039675191100646, "compression/movement_sparsity/linear_layer_sparsity": 0.8661249731944711, "compression/movement_sparsity/model_sparsity": 0.8363709158531657, "compression_loss": 95.9691390991211, "distillation_loss": 4.651078701019287, "epoch": 3.61, "learning_rate": 3.5479477787170095e-05, "loss": 100.066, "step": 4275, "task_loss": 2.728271007537842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.901212820253542, "compression/movement_sparsity/importance_threshold": -0.000702682907166071, "compression/movement_sparsity/linear_layer_sparsity": 0.8662433682549276, "compression/movement_sparsity/model_sparsity": 0.8364852436790696, "compression_loss": 95.98787689208984, "distillation_loss": 5.252509593963623, "epoch": 3.61, "learning_rate": 3.547478162862778e-05, "loss": 100.3445, "step": 4276, "task_loss": 2.500215768814087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9013931985042917, "compression/movement_sparsity/importance_threshold": -0.000701399858961318, "compression/movement_sparsity/linear_layer_sparsity": 0.8663747725822748, "compression/movement_sparsity/model_sparsity": 0.8366121338635255, "compression_loss": 96.0065689086914, "distillation_loss": 5.140585422515869, "epoch": 3.61, "learning_rate": 3.5470085470085474e-05, "loss": 100.1555, "step": 4277, "task_loss": 2.077906847000122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9015733570495258, "compression/movement_sparsity/importance_threshold": -0.0007001183735434648, "compression/movement_sparsity/linear_layer_sparsity": 0.86654724374296, "compression/movement_sparsity/model_sparsity": 0.8367786801092577, "compression_loss": 96.02525329589844, "distillation_loss": 3.157529354095459, "epoch": 3.62, "learning_rate": 3.5465389311543154e-05, "loss": 99.7624, "step": 4278, "task_loss": 1.618269681930542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9017532960231291, "compression/movement_sparsity/importance_threshold": -0.0006988384499601761, "compression/movement_sparsity/linear_layer_sparsity": 0.866687889300225, "compression/movement_sparsity/model_sparsity": 0.8369144940589544, "compression_loss": 96.04387664794922, "distillation_loss": 2.873100519180298, "epoch": 3.62, "learning_rate": 3.546069315300085e-05, "loss": 99.5371, "step": 4279, "task_loss": 1.8492618799209595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9019330155589871, "compression/movement_sparsity/importance_threshold": -0.0006975600872591121, "compression/movement_sparsity/linear_layer_sparsity": 0.8667401648511406, "compression/movement_sparsity/model_sparsity": 0.8369649737838776, "compression_loss": 96.0624771118164, "distillation_loss": 4.253978252410889, "epoch": 3.62, "learning_rate": 3.5455996994458533e-05, "loss": 100.482, "step": 4280, "task_loss": 1.9532010555267334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9021125157909847, "compression/movement_sparsity/importance_threshold": -0.0006962832844879347, "compression/movement_sparsity/linear_layer_sparsity": 0.8669236777910565, "compression/movement_sparsity/model_sparsity": 0.8371421824897556, "compression_loss": 96.0810546875, "distillation_loss": 5.6727495193481445, "epoch": 3.62, "learning_rate": 3.5451300835916227e-05, "loss": 100.5635, "step": 4281, "task_loss": 2.8349812030792236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9022917968530071, "compression/movement_sparsity/importance_threshold": -0.000695008040694306, "compression/movement_sparsity/linear_layer_sparsity": 0.8670197865822016, "compression/movement_sparsity/model_sparsity": 0.8372349896482596, "compression_loss": 96.09957122802734, "distillation_loss": 5.343999862670898, "epoch": 3.62, "learning_rate": 3.544660467737391e-05, "loss": 101.03, "step": 4282, "task_loss": 2.2804927825927734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9024708588789395, "compression/movement_sparsity/importance_threshold": -0.0006937343549258878, "compression/movement_sparsity/linear_layer_sparsity": 0.8670901391712532, "compression/movement_sparsity/model_sparsity": 0.8373029254094474, "compression_loss": 96.11812591552734, "distillation_loss": 4.515866279602051, "epoch": 3.62, "learning_rate": 3.544190851883159e-05, "loss": 100.861, "step": 4283, "task_loss": 2.2177200317382812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9026497020026669, "compression/movement_sparsity/importance_threshold": -0.0006924622262303431, "compression/movement_sparsity/linear_layer_sparsity": 0.8672478124399022, "compression/movement_sparsity/model_sparsity": 0.8374551821162586, "compression_loss": 96.13661193847656, "distillation_loss": 4.606772422790527, "epoch": 3.62, "learning_rate": 3.5437212360289286e-05, "loss": 100.459, "step": 4284, "task_loss": 3.6668343544006348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9028283263580746, "compression/movement_sparsity/importance_threshold": -0.000691191653655332, "compression/movement_sparsity/linear_layer_sparsity": 0.8673549033894399, "compression/movement_sparsity/model_sparsity": 0.8375585941622293, "compression_loss": 96.15512084960938, "distillation_loss": 3.74487566947937, "epoch": 3.62, "learning_rate": 3.543251620174697e-05, "loss": 99.8355, "step": 4285, "task_loss": 2.2156307697296143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9030067320790476, "compression/movement_sparsity/importance_threshold": -0.0006899226362485173, "compression/movement_sparsity/linear_layer_sparsity": 0.8674447639167437, "compression/movement_sparsity/model_sparsity": 0.837645367703977, "compression_loss": 96.17361450195312, "distillation_loss": 3.800901412963867, "epoch": 3.62, "learning_rate": 3.5427820043204665e-05, "loss": 100.1957, "step": 4286, "task_loss": 2.5456409454345703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9031849192994712, "compression/movement_sparsity/importance_threshold": -0.0006886551730575603, "compression/movement_sparsity/linear_layer_sparsity": 0.8674521807490132, "compression/movement_sparsity/model_sparsity": 0.8376525297452413, "compression_loss": 96.19204711914062, "distillation_loss": 2.6621737480163574, "epoch": 3.62, "learning_rate": 3.5423123884662345e-05, "loss": 100.2569, "step": 4287, "task_loss": 1.5923149585723877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9033628881532304, "compression/movement_sparsity/importance_threshold": -0.0006873892631301236, "compression/movement_sparsity/linear_layer_sparsity": 0.8675844674647655, "compression/movement_sparsity/model_sparsity": 0.8377802720053459, "compression_loss": 96.21049499511719, "distillation_loss": 5.23714542388916, "epoch": 3.62, "learning_rate": 3.541842772612004e-05, "loss": 100.7566, "step": 4288, "task_loss": 2.001322031021118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9035406387742103, "compression/movement_sparsity/importance_threshold": -0.0006861249055138694, "compression/movement_sparsity/linear_layer_sparsity": 0.8677646416377434, "compression/movement_sparsity/model_sparsity": 0.8379542566412014, "compression_loss": 96.22888946533203, "distillation_loss": 4.171845436096191, "epoch": 3.63, "learning_rate": 3.5413731567577724e-05, "loss": 100.4798, "step": 4289, "task_loss": 3.022501230239868 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9037181712962963, "compression/movement_sparsity/importance_threshold": -0.0006848620992564568, "compression/movement_sparsity/linear_layer_sparsity": 0.8679322000413625, "compression/movement_sparsity/model_sparsity": 0.8381160588981863, "compression_loss": 96.24727630615234, "distillation_loss": 3.276167392730713, "epoch": 3.63, "learning_rate": 3.540903540903541e-05, "loss": 99.6771, "step": 4290, "task_loss": 2.0082085132598877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9038954858533733, "compression/movement_sparsity/importance_threshold": -0.0006836008434055506, "compression/movement_sparsity/linear_layer_sparsity": 0.8680299901401443, "compression/movement_sparsity/model_sparsity": 0.8382104896062373, "compression_loss": 96.26569366455078, "distillation_loss": 4.706976890563965, "epoch": 3.63, "learning_rate": 3.54043392504931e-05, "loss": 100.4565, "step": 4291, "task_loss": 2.1858999729156494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9040725825793265, "compression/movement_sparsity/importance_threshold": -0.0006823411370088118, "compression/movement_sparsity/linear_layer_sparsity": 0.8681310951575287, "compression/movement_sparsity/model_sparsity": 0.8383081213552392, "compression_loss": 96.28406524658203, "distillation_loss": 5.345820426940918, "epoch": 3.63, "learning_rate": 3.539964309195078e-05, "loss": 100.5279, "step": 4292, "task_loss": 2.712571382522583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9042494616080411, "compression/movement_sparsity/importance_threshold": -0.0006810829791139023, "compression/movement_sparsity/linear_layer_sparsity": 0.8682808984755381, "compression/movement_sparsity/model_sparsity": 0.838452778468426, "compression_loss": 96.30237579345703, "distillation_loss": 4.091446399688721, "epoch": 3.63, "learning_rate": 3.5394946933408476e-05, "loss": 100.4043, "step": 4293, "task_loss": 3.0646886825561523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9044261230734021, "compression/movement_sparsity/importance_threshold": -0.0006798263687684842, "compression/movement_sparsity/linear_layer_sparsity": 0.8684473598557347, "compression/movement_sparsity/model_sparsity": 0.8386135213881178, "compression_loss": 96.32060241699219, "distillation_loss": 4.715254783630371, "epoch": 3.63, "learning_rate": 3.539025077486616e-05, "loss": 100.7685, "step": 4294, "task_loss": 1.8244891166687012 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9046025671092948, "compression/movement_sparsity/importance_threshold": -0.0006785713050202185, "compression/movement_sparsity/linear_layer_sparsity": 0.868568020508042, "compression/movement_sparsity/model_sparsity": 0.8387300369758228, "compression_loss": 96.33891296386719, "distillation_loss": 5.351409435272217, "epoch": 3.63, "learning_rate": 3.538555461632385e-05, "loss": 100.9052, "step": 4295, "task_loss": 2.1539011001586914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9047787938496042, "compression/movement_sparsity/importance_threshold": -0.0006773177869167672, "compression/movement_sparsity/linear_layer_sparsity": 0.8686848535025382, "compression/movement_sparsity/model_sparsity": 0.8388428563975376, "compression_loss": 96.35713195800781, "distillation_loss": 4.9772210121154785, "epoch": 3.63, "learning_rate": 3.5380858457781535e-05, "loss": 101.0725, "step": 4296, "task_loss": 2.478421449661255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9049548034282157, "compression/movement_sparsity/importance_threshold": -0.0006760658135057931, "compression/movement_sparsity/linear_layer_sparsity": 0.8687686446285154, "compression/movement_sparsity/model_sparsity": 0.838923769040566, "compression_loss": 96.3753662109375, "distillation_loss": 5.031166076660156, "epoch": 3.63, "learning_rate": 3.537616229923922e-05, "loss": 100.4652, "step": 4297, "task_loss": 2.1664164066314697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9051305959790141, "compression/movement_sparsity/importance_threshold": -0.0006748153838349565, "compression/movement_sparsity/linear_layer_sparsity": 0.8688564065023154, "compression/movement_sparsity/model_sparsity": 0.8390085160240137, "compression_loss": 96.39356994628906, "distillation_loss": 4.09401798248291, "epoch": 3.63, "learning_rate": 3.5371466140696915e-05, "loss": 101.2818, "step": 4298, "task_loss": 2.1398513317108154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9053061716358847, "compression/movement_sparsity/importance_threshold": -0.000673566496951921, "compression/movement_sparsity/linear_layer_sparsity": 0.8689441206794448, "compression/movement_sparsity/model_sparsity": 0.8390932169493184, "compression_loss": 96.41178131103516, "distillation_loss": 5.591272354125977, "epoch": 3.63, "learning_rate": 3.53667699821546e-05, "loss": 100.4545, "step": 4299, "task_loss": 2.2838823795318604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9054815305327127, "compression/movement_sparsity/importance_threshold": -0.000672319151904347, "compression/movement_sparsity/linear_layer_sparsity": 0.8690831445499113, "compression/movement_sparsity/model_sparsity": 0.839227464922147, "compression_loss": 96.42992401123047, "distillation_loss": 4.175162315368652, "epoch": 3.63, "learning_rate": 3.536207382361229e-05, "loss": 100.7709, "step": 4300, "task_loss": 2.1755504608154297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9056566728033831, "compression/movement_sparsity/importance_threshold": -0.000671073347739898, "compression/movement_sparsity/linear_layer_sparsity": 0.8691145170349612, "compression/movement_sparsity/model_sparsity": 0.8392577596658224, "compression_loss": 96.44806671142578, "distillation_loss": 6.890725612640381, "epoch": 3.64, "learning_rate": 3.5357377665069974e-05, "loss": 101.4944, "step": 4301, "task_loss": 3.6747424602508545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9058315985817812, "compression/movement_sparsity/importance_threshold": -0.0006698290835062343, "compression/movement_sparsity/linear_layer_sparsity": 0.8691783351801484, "compression/movement_sparsity/model_sparsity": 0.8393193854613947, "compression_loss": 96.46621704101562, "distillation_loss": 4.428505897521973, "epoch": 3.64, "learning_rate": 3.535268150652766e-05, "loss": 100.627, "step": 4302, "task_loss": 3.928126096725464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.906006308001792, "compression/movement_sparsity/importance_threshold": -0.0006685863582510179, "compression/movement_sparsity/linear_layer_sparsity": 0.8693452735270505, "compression/movement_sparsity/model_sparsity": 0.8394805889625183, "compression_loss": 96.48426055908203, "distillation_loss": 4.096075534820557, "epoch": 3.64, "learning_rate": 3.534798534798535e-05, "loss": 100.9728, "step": 4303, "task_loss": 2.0102994441986084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9061808011973007, "compression/movement_sparsity/importance_threshold": -0.0006673451710219117, "compression/movement_sparsity/linear_layer_sparsity": 0.8694869564868998, "compression/movement_sparsity/model_sparsity": 0.8396174046768291, "compression_loss": 96.50232696533203, "distillation_loss": 4.029417514801025, "epoch": 3.64, "learning_rate": 3.534328918944303e-05, "loss": 101.0691, "step": 4304, "task_loss": 1.912031650543213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9063550783021925, "compression/movement_sparsity/importance_threshold": -0.0006661055208665757, "compression/movement_sparsity/linear_layer_sparsity": 0.8696491490150828, "compression/movement_sparsity/model_sparsity": 0.8397740253927064, "compression_loss": 96.52037811279297, "distillation_loss": 4.687166690826416, "epoch": 3.64, "learning_rate": 3.5338593030900726e-05, "loss": 100.4882, "step": 4305, "task_loss": 2.7756474018096924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9065291394503525, "compression/movement_sparsity/importance_threshold": -0.000664867406832673, "compression/movement_sparsity/linear_layer_sparsity": 0.8697870639379592, "compression/movement_sparsity/model_sparsity": 0.8399072025137061, "compression_loss": 96.53839874267578, "distillation_loss": 2.8912041187286377, "epoch": 3.64, "learning_rate": 3.533389687235841e-05, "loss": 99.9438, "step": 4306, "task_loss": 1.1755326986312866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9067029847756658, "compression/movement_sparsity/importance_threshold": -0.0006636308279678663, "compression/movement_sparsity/linear_layer_sparsity": 0.8699860305991313, "compression/movement_sparsity/model_sparsity": 0.8400993340579738, "compression_loss": 96.556396484375, "distillation_loss": 3.7736716270446777, "epoch": 3.64, "learning_rate": 3.5329200713816105e-05, "loss": 100.7755, "step": 4307, "task_loss": 2.50177264213562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9068766144120175, "compression/movement_sparsity/importance_threshold": -0.0006623957833198166, "compression/movement_sparsity/linear_layer_sparsity": 0.8701193547174678, "compression/movement_sparsity/model_sparsity": 0.8402280780826925, "compression_loss": 96.5743179321289, "distillation_loss": 5.024575233459473, "epoch": 3.64, "learning_rate": 3.5324504555273785e-05, "loss": 101.3283, "step": 4308, "task_loss": 2.0596323013305664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9070500284932929, "compression/movement_sparsity/importance_threshold": -0.0006611622719361852, "compression/movement_sparsity/linear_layer_sparsity": 0.8702739992475373, "compression/movement_sparsity/model_sparsity": 0.840377410097412, "compression_loss": 96.59225463867188, "distillation_loss": 3.3621935844421387, "epoch": 3.64, "learning_rate": 3.531980839673147e-05, "loss": 100.8369, "step": 4309, "task_loss": 2.4863851070404053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9072232271533769, "compression/movement_sparsity/importance_threshold": -0.0006599302928646349, "compression/movement_sparsity/linear_layer_sparsity": 0.870369106408601, "compression/movement_sparsity/model_sparsity": 0.8404692500349092, "compression_loss": 96.61013793945312, "distillation_loss": 4.469959259033203, "epoch": 3.64, "learning_rate": 3.5315112238189164e-05, "loss": 100.596, "step": 4310, "task_loss": 2.278229236602783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9073962105261548, "compression/movement_sparsity/importance_threshold": -0.0006586998451528266, "compression/movement_sparsity/linear_layer_sparsity": 0.87044844982005, "compression/movement_sparsity/model_sparsity": 0.8405458677560861, "compression_loss": 96.62803649902344, "distillation_loss": 4.333757400512695, "epoch": 3.64, "learning_rate": 3.531041607964685e-05, "loss": 101.8295, "step": 4311, "task_loss": 2.144632339477539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9075689787455119, "compression/movement_sparsity/importance_threshold": -0.0006574709278484225, "compression/movement_sparsity/linear_layer_sparsity": 0.8706340494893022, "compression/movement_sparsity/model_sparsity": 0.840725091505728, "compression_loss": 96.64588165283203, "distillation_loss": 2.989436626434326, "epoch": 3.64, "learning_rate": 3.5305719921104544e-05, "loss": 100.9152, "step": 4312, "task_loss": 1.8431437015533447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.907741531945333, "compression/movement_sparsity/importance_threshold": -0.0006562435399990844, "compression/movement_sparsity/linear_layer_sparsity": 0.8707777834059849, "compression/movement_sparsity/model_sparsity": 0.8408638877201955, "compression_loss": 96.66368103027344, "distillation_loss": 3.5543437004089355, "epoch": 3.65, "learning_rate": 3.530102376256222e-05, "loss": 100.0449, "step": 4313, "task_loss": 2.2528488636016846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9079138702595035, "compression/movement_sparsity/importance_threshold": -0.0006550176806524744, "compression/movement_sparsity/linear_layer_sparsity": 0.8707548770799564, "compression/movement_sparsity/model_sparsity": 0.8408417682969341, "compression_loss": 96.68146514892578, "distillation_loss": 4.056127548217773, "epoch": 3.65, "learning_rate": 3.5296327604019916e-05, "loss": 100.6499, "step": 4314, "task_loss": 2.6276419162750244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9080859938219084, "compression/movement_sparsity/importance_threshold": -0.0006537933488562553, "compression/movement_sparsity/linear_layer_sparsity": 0.8708238106930594, "compression/movement_sparsity/model_sparsity": 0.8409083338283624, "compression_loss": 96.6992416381836, "distillation_loss": 5.374549865722656, "epoch": 3.65, "learning_rate": 3.52916314454776e-05, "loss": 101.475, "step": 4315, "task_loss": 2.969144105911255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9082579027664329, "compression/movement_sparsity/importance_threshold": -0.0006525705436580872, "compression/movement_sparsity/linear_layer_sparsity": 0.8708829664887009, "compression/movement_sparsity/model_sparsity": 0.8409654574404392, "compression_loss": 96.71700286865234, "distillation_loss": 4.06684684753418, "epoch": 3.65, "learning_rate": 3.528693528693529e-05, "loss": 101.1132, "step": 4316, "task_loss": 3.0164380073547363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.908429597226962, "compression/movement_sparsity/importance_threshold": -0.0006513492641056339, "compression/movement_sparsity/linear_layer_sparsity": 0.870990105134909, "compression/movement_sparsity/model_sparsity": 0.8410689155445531, "compression_loss": 96.7347640991211, "distillation_loss": 5.606411933898926, "epoch": 3.65, "learning_rate": 3.5282239128392975e-05, "loss": 101.1857, "step": 4317, "task_loss": 2.957169771194458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9086010773373813, "compression/movement_sparsity/importance_threshold": -0.0006501295092465539, "compression/movement_sparsity/linear_layer_sparsity": 0.8710223003875259, "compression/movement_sparsity/model_sparsity": 0.8411000047911984, "compression_loss": 96.75237274169922, "distillation_loss": 4.649243354797363, "epoch": 3.65, "learning_rate": 3.527754296985066e-05, "loss": 101.06, "step": 4318, "task_loss": 3.1281561851501465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9087723432315753, "compression/movement_sparsity/importance_threshold": -0.0006489112781285127, "compression/movement_sparsity/linear_layer_sparsity": 0.8710487481913423, "compression/movement_sparsity/model_sparsity": 0.8411255440315907, "compression_loss": 96.77008056640625, "distillation_loss": 3.984018564224243, "epoch": 3.65, "learning_rate": 3.5272846811308355e-05, "loss": 101.3247, "step": 4319, "task_loss": 1.1019234657287598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9089433950434296, "compression/movement_sparsity/importance_threshold": -0.0006476945697991695, "compression/movement_sparsity/linear_layer_sparsity": 0.8711473372093556, "compression/movement_sparsity/model_sparsity": 0.8412207462135399, "compression_loss": 96.7877426147461, "distillation_loss": 4.2632269859313965, "epoch": 3.65, "learning_rate": 3.526815065276604e-05, "loss": 100.9255, "step": 4320, "task_loss": 2.3293616771698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9091142329068291, "compression/movement_sparsity/importance_threshold": -0.0006464793833061882, "compression/movement_sparsity/linear_layer_sparsity": 0.8712858244922784, "compression/movement_sparsity/model_sparsity": 0.8413544760322577, "compression_loss": 96.8054428100586, "distillation_loss": 5.105966567993164, "epoch": 3.65, "learning_rate": 3.526345449422373e-05, "loss": 101.1502, "step": 4321, "task_loss": 2.823741912841797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9092848569556591, "compression/movement_sparsity/importance_threshold": -0.0006452657176972297, "compression/movement_sparsity/linear_layer_sparsity": 0.8714591303446981, "compression/movement_sparsity/model_sparsity": 0.8415218282954956, "compression_loss": 96.82305908203125, "distillation_loss": 4.155280113220215, "epoch": 3.65, "learning_rate": 3.5258758335681414e-05, "loss": 101.4461, "step": 4322, "task_loss": 1.3473261594772339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9094552673238047, "compression/movement_sparsity/importance_threshold": -0.0006440535720199552, "compression/movement_sparsity/linear_layer_sparsity": 0.8715948512207295, "compression/movement_sparsity/model_sparsity": 0.841652886741909, "compression_loss": 96.84066009521484, "distillation_loss": 5.003501892089844, "epoch": 3.65, "learning_rate": 3.52540621771391e-05, "loss": 100.833, "step": 4323, "task_loss": 2.969928026199341 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9096254641451509, "compression/movement_sparsity/importance_threshold": -0.0006428429453220284, "compression/movement_sparsity/linear_layer_sparsity": 0.8717199953600678, "compression/movement_sparsity/model_sparsity": 0.8417737317950728, "compression_loss": 96.85829162597656, "distillation_loss": 3.5958809852600098, "epoch": 3.65, "learning_rate": 3.524936601859679e-05, "loss": 100.9162, "step": 4324, "task_loss": 1.7042759656906128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.909795447553583, "compression/movement_sparsity/importance_threshold": -0.0006416338366511095, "compression/movement_sparsity/linear_layer_sparsity": 0.871797371283857, "compression/movement_sparsity/model_sparsity": 0.8418484496178434, "compression_loss": 96.87586975097656, "distillation_loss": 5.332908630371094, "epoch": 3.66, "learning_rate": 3.524466986005447e-05, "loss": 101.1995, "step": 4325, "task_loss": 2.0948922634124756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9099652176829861, "compression/movement_sparsity/importance_threshold": -0.0006404262450548604, "compression/movement_sparsity/linear_layer_sparsity": 0.871857659875424, "compression/movement_sparsity/model_sparsity": 0.8419066671108207, "compression_loss": 96.89339447021484, "distillation_loss": 5.867154121398926, "epoch": 3.66, "learning_rate": 3.5239973701512166e-05, "loss": 101.4092, "step": 4326, "task_loss": 3.016880989074707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9101347746672453, "compression/movement_sparsity/importance_threshold": -0.0006392201695809441, "compression/movement_sparsity/linear_layer_sparsity": 0.87192072679805, "compression/movement_sparsity/model_sparsity": 0.8419675674906381, "compression_loss": 96.91092681884766, "distillation_loss": 4.770139694213867, "epoch": 3.66, "learning_rate": 3.523527754296985e-05, "loss": 101.1451, "step": 4327, "task_loss": 2.380967378616333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.910304118640246, "compression/movement_sparsity/importance_threshold": -0.0006380156092770208, "compression/movement_sparsity/linear_layer_sparsity": 0.8719082541187029, "compression/movement_sparsity/model_sparsity": 0.8419555232861969, "compression_loss": 96.92851257324219, "distillation_loss": 4.488391876220703, "epoch": 3.66, "learning_rate": 3.523058138442754e-05, "loss": 101.4377, "step": 4328, "task_loss": 1.9917759895324707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9104732497358727, "compression/movement_sparsity/importance_threshold": -0.0006368125631907549, "compression/movement_sparsity/linear_layer_sparsity": 0.8719756972108513, "compression/movement_sparsity/model_sparsity": 0.8420206495006508, "compression_loss": 96.94599151611328, "distillation_loss": 4.459288597106934, "epoch": 3.66, "learning_rate": 3.522588522588523e-05, "loss": 101.1848, "step": 4329, "task_loss": 1.5469865798950195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9106421680880112, "compression/movement_sparsity/importance_threshold": -0.000635611030369806, "compression/movement_sparsity/linear_layer_sparsity": 0.8721258105572192, "compression/movement_sparsity/model_sparsity": 0.8421656059917684, "compression_loss": 96.96351623535156, "distillation_loss": 3.6506779193878174, "epoch": 3.66, "learning_rate": 3.522118906734291e-05, "loss": 101.8771, "step": 4330, "task_loss": 2.497293472290039 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9108108738305466, "compression/movement_sparsity/importance_threshold": -0.000634411009861835, "compression/movement_sparsity/linear_layer_sparsity": 0.8722129046776316, "compression/movement_sparsity/model_sparsity": 0.8422497081612117, "compression_loss": 96.98101043701172, "distillation_loss": 4.599941730499268, "epoch": 3.66, "learning_rate": 3.5216492908800604e-05, "loss": 101.4115, "step": 4331, "task_loss": 1.6861639022827148 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9109793670973635, "compression/movement_sparsity/importance_threshold": -0.0006332125007145074, "compression/movement_sparsity/linear_layer_sparsity": 0.872372461964767, "compression/movement_sparsity/model_sparsity": 0.8424037841646784, "compression_loss": 96.99842834472656, "distillation_loss": 4.413607120513916, "epoch": 3.66, "learning_rate": 3.521179675025829e-05, "loss": 101.0354, "step": 4332, "task_loss": 2.0775222778320312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9111476480223475, "compression/movement_sparsity/importance_threshold": -0.0006320155019754825, "compression/movement_sparsity/linear_layer_sparsity": 0.872528072352415, "compression/movement_sparsity/model_sparsity": 0.8425540488567972, "compression_loss": 97.01582336425781, "distillation_loss": 4.3754401206970215, "epoch": 3.66, "learning_rate": 3.520710059171598e-05, "loss": 100.9529, "step": 4333, "task_loss": 2.2150676250457764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9113157167393836, "compression/movement_sparsity/importance_threshold": -0.0006308200126924225, "compression/movement_sparsity/linear_layer_sparsity": 0.8725821961493142, "compression/movement_sparsity/model_sparsity": 0.8426063133347685, "compression_loss": 97.0331802368164, "distillation_loss": 6.584428787231445, "epoch": 3.66, "learning_rate": 3.5202404433173664e-05, "loss": 101.5197, "step": 4334, "task_loss": 3.1336729526519775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9114835733823571, "compression/movement_sparsity/importance_threshold": -0.0006296260319129891, "compression/movement_sparsity/linear_layer_sparsity": 0.8726590951063979, "compression/movement_sparsity/model_sparsity": 0.8426805705761076, "compression_loss": 97.05058288574219, "distillation_loss": 4.901535987854004, "epoch": 3.66, "learning_rate": 3.519770827463135e-05, "loss": 101.3561, "step": 4335, "task_loss": 2.768127202987671 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9116512180851528, "compression/movement_sparsity/importance_threshold": -0.0006284335586848443, "compression/movement_sparsity/linear_layer_sparsity": 0.8727987509577492, "compression/movement_sparsity/model_sparsity": 0.8428154288193332, "compression_loss": 97.06786346435547, "distillation_loss": 6.3475236892700195, "epoch": 3.66, "learning_rate": 3.519301211608904e-05, "loss": 101.4688, "step": 4336, "task_loss": 2.8653011322021484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9118186509816562, "compression/movement_sparsity/importance_threshold": -0.0006272425920556511, "compression/movement_sparsity/linear_layer_sparsity": 0.8729990531256964, "compression/movement_sparsity/model_sparsity": 0.84300884999161, "compression_loss": 97.08513641357422, "distillation_loss": 3.3743069171905518, "epoch": 3.67, "learning_rate": 3.518831595754673e-05, "loss": 100.6516, "step": 4337, "task_loss": 1.3316086530685425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9119858722057521, "compression/movement_sparsity/importance_threshold": -0.0006260531310730696, "compression/movement_sparsity/linear_layer_sparsity": 0.8730721363491366, "compression/movement_sparsity/model_sparsity": 0.8430794225814947, "compression_loss": 97.10237884521484, "distillation_loss": 5.050867080688477, "epoch": 3.67, "learning_rate": 3.5183619799004416e-05, "loss": 101.0622, "step": 4338, "task_loss": 2.990431308746338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9121528818913259, "compression/movement_sparsity/importance_threshold": -0.0006248651747847635, "compression/movement_sparsity/linear_layer_sparsity": 0.8731772598110144, "compression/movement_sparsity/model_sparsity": 0.8431809347290593, "compression_loss": 97.11962890625, "distillation_loss": 5.664292335510254, "epoch": 3.67, "learning_rate": 3.51789236404621e-05, "loss": 102.116, "step": 4339, "task_loss": 2.8592259883880615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9123196801722628, "compression/movement_sparsity/importance_threshold": -0.0006236787222383914, "compression/movement_sparsity/linear_layer_sparsity": 0.8733578513298594, "compression/movement_sparsity/model_sparsity": 0.8433553223736676, "compression_loss": 97.13687896728516, "distillation_loss": 4.6449174880981445, "epoch": 3.67, "learning_rate": 3.517422748191979e-05, "loss": 101.411, "step": 4340, "task_loss": 1.9914647340774536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9124862671824476, "compression/movement_sparsity/importance_threshold": -0.0006224937724816195, "compression/movement_sparsity/linear_layer_sparsity": 0.8733796606324655, "compression/movement_sparsity/model_sparsity": 0.8433763824596359, "compression_loss": 97.15402221679688, "distillation_loss": 4.491488933563232, "epoch": 3.67, "learning_rate": 3.516953132337748e-05, "loss": 101.4238, "step": 4341, "task_loss": 2.7758970260620117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9126526430557658, "compression/movement_sparsity/importance_threshold": -0.0006213103245621071, "compression/movement_sparsity/linear_layer_sparsity": 0.8734265226112744, "compression/movement_sparsity/model_sparsity": 0.8434216345853084, "compression_loss": 97.17120361328125, "distillation_loss": 4.414536476135254, "epoch": 3.67, "learning_rate": 3.516483516483517e-05, "loss": 101.916, "step": 4342, "task_loss": 1.8419452905654907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9128188079261023, "compression/movement_sparsity/importance_threshold": -0.0006201283775275164, "compression/movement_sparsity/linear_layer_sparsity": 0.87351017064724, "compression/movement_sparsity/model_sparsity": 0.8435024090539072, "compression_loss": 97.18831634521484, "distillation_loss": 3.1899051666259766, "epoch": 3.67, "learning_rate": 3.5160139006292854e-05, "loss": 101.7864, "step": 4343, "task_loss": 2.91369366645813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9129847619273423, "compression/movement_sparsity/importance_threshold": -0.0006189479304255082, "compression/movement_sparsity/linear_layer_sparsity": 0.8735364634368771, "compression/movement_sparsity/model_sparsity": 0.8435277986053341, "compression_loss": 97.20537567138672, "distillation_loss": 3.840334415435791, "epoch": 3.67, "learning_rate": 3.515544284775054e-05, "loss": 101.5536, "step": 4344, "task_loss": 2.318784475326538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.913150505193371, "compression/movement_sparsity/importance_threshold": -0.0006177689823037465, "compression/movement_sparsity/linear_layer_sparsity": 0.8736997529884826, "compression/movement_sparsity/model_sparsity": 0.8436854786585045, "compression_loss": 97.222412109375, "distillation_loss": 4.392070770263672, "epoch": 3.67, "learning_rate": 3.5150746689208234e-05, "loss": 101.6293, "step": 4345, "task_loss": 2.812173843383789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9133160378580736, "compression/movement_sparsity/importance_threshold": -0.0006165915322098913, "compression/movement_sparsity/linear_layer_sparsity": 0.8737563570122501, "compression/movement_sparsity/model_sparsity": 0.8437401381599212, "compression_loss": 97.23946380615234, "distillation_loss": 5.413187026977539, "epoch": 3.67, "learning_rate": 3.514605053066592e-05, "loss": 101.6677, "step": 4346, "task_loss": 2.908686399459839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9134813600553351, "compression/movement_sparsity/importance_threshold": -0.0006154155791916055, "compression/movement_sparsity/linear_layer_sparsity": 0.8738120309509418, "compression/movement_sparsity/model_sparsity": 0.8437938995275459, "compression_loss": 97.25646209716797, "distillation_loss": 4.192494869232178, "epoch": 3.67, "learning_rate": 3.51413543721236e-05, "loss": 100.5218, "step": 4347, "task_loss": 2.363858699798584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9136464719190405, "compression/movement_sparsity/importance_threshold": -0.000614241122296552, "compression/movement_sparsity/linear_layer_sparsity": 0.8739405257813859, "compression/movement_sparsity/model_sparsity": 0.8439179801652679, "compression_loss": 97.27347564697266, "distillation_loss": 4.4419050216674805, "epoch": 3.67, "learning_rate": 3.513665821358129e-05, "loss": 101.8099, "step": 4348, "task_loss": 2.151049852371216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9138113735830754, "compression/movement_sparsity/importance_threshold": -0.0006130681605723892, "compression/movement_sparsity/linear_layer_sparsity": 0.8740349771132296, "compression/movement_sparsity/model_sparsity": 0.8440091868032965, "compression_loss": 97.2903823852539, "distillation_loss": 3.8331332206726074, "epoch": 3.68, "learning_rate": 3.513196205503898e-05, "loss": 100.9744, "step": 4349, "task_loss": 2.2838525772094727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9139760651813245, "compression/movement_sparsity/importance_threshold": -0.0006118966930667826, "compression/movement_sparsity/linear_layer_sparsity": 0.8740858336881966, "compression/movement_sparsity/model_sparsity": 0.8440582962984602, "compression_loss": 97.30725860595703, "distillation_loss": 4.197651386260986, "epoch": 3.68, "learning_rate": 3.512726589649667e-05, "loss": 101.4281, "step": 4350, "task_loss": 2.6501739025115967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9141405468476731, "compression/movement_sparsity/importance_threshold": -0.0006107267188273924, "compression/movement_sparsity/linear_layer_sparsity": 0.8741158706664713, "compression/movement_sparsity/model_sparsity": 0.8440873014141266, "compression_loss": 97.3241195678711, "distillation_loss": 3.5685415267944336, "epoch": 3.68, "learning_rate": 3.512256973795435e-05, "loss": 102.0134, "step": 4351, "task_loss": 1.8292555809020996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9143048187160064, "compression/movement_sparsity/importance_threshold": -0.0006095582369018806, "compression/movement_sparsity/linear_layer_sparsity": 0.8741842319195277, "compression/movement_sparsity/model_sparsity": 0.8441533142478367, "compression_loss": 97.34100341796875, "distillation_loss": 6.293216228485107, "epoch": 3.68, "learning_rate": 3.5117873579412045e-05, "loss": 102.1354, "step": 4352, "task_loss": 3.4577300548553467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9144688809202095, "compression/movement_sparsity/importance_threshold": -0.0006083912463379091, "compression/movement_sparsity/linear_layer_sparsity": 0.8743167690428003, "compression/movement_sparsity/model_sparsity": 0.8442812983131931, "compression_loss": 97.35787200927734, "distillation_loss": 4.066761493682861, "epoch": 3.68, "learning_rate": 3.511317742086973e-05, "loss": 101.3858, "step": 4353, "task_loss": 2.1670775413513184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9146327335941675, "compression/movement_sparsity/importance_threshold": -0.000607225746183139, "compression/movement_sparsity/linear_layer_sparsity": 0.8744189353111044, "compression/movement_sparsity/model_sparsity": 0.8443799548558807, "compression_loss": 97.37471771240234, "distillation_loss": 5.733152389526367, "epoch": 3.68, "learning_rate": 3.510848126232742e-05, "loss": 101.7128, "step": 4354, "task_loss": 2.686049699783325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9147963768717657, "compression/movement_sparsity/importance_threshold": -0.0006060617354852324, "compression/movement_sparsity/linear_layer_sparsity": 0.8745584838449469, "compression/movement_sparsity/model_sparsity": 0.8445147094682842, "compression_loss": 97.39154052734375, "distillation_loss": 3.273974895477295, "epoch": 3.68, "learning_rate": 3.5103785103785104e-05, "loss": 102.212, "step": 4355, "task_loss": 1.6871647834777832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.914959810886889, "compression/movement_sparsity/importance_threshold": -0.0006048992132918528, "compression/movement_sparsity/linear_layer_sparsity": 0.8747142253984389, "compression/movement_sparsity/model_sparsity": 0.8446651008202967, "compression_loss": 97.40833282470703, "distillation_loss": 5.929620742797852, "epoch": 3.68, "learning_rate": 3.509908894524279e-05, "loss": 102.2862, "step": 4356, "task_loss": 3.626939296722412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9151230357734226, "compression/movement_sparsity/importance_threshold": -0.0006037381786506615, "compression/movement_sparsity/linear_layer_sparsity": 0.8747404943397407, "compression/movement_sparsity/model_sparsity": 0.8446904673426521, "compression_loss": 97.42509460449219, "distillation_loss": 5.587625503540039, "epoch": 3.68, "learning_rate": 3.509439278670048e-05, "loss": 101.7402, "step": 4357, "task_loss": 2.9942009449005127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9152860516652519, "compression/movement_sparsity/importance_threshold": -0.0006025786306093168, "compression/movement_sparsity/linear_layer_sparsity": 0.8748271114934476, "compression/movement_sparsity/model_sparsity": 0.8447741089306636, "compression_loss": 97.44182586669922, "distillation_loss": 4.631424903869629, "epoch": 3.68, "learning_rate": 3.508969662815817e-05, "loss": 101.4022, "step": 4358, "task_loss": 2.075044631958008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9154488586962616, "compression/movement_sparsity/importance_threshold": -0.0006014205682154869, "compression/movement_sparsity/linear_layer_sparsity": 0.8748879009000553, "compression/movement_sparsity/model_sparsity": 0.8448328100341441, "compression_loss": 97.45854949951172, "distillation_loss": 5.170459270477295, "epoch": 3.68, "learning_rate": 3.5085000469615856e-05, "loss": 101.7259, "step": 4359, "task_loss": 4.450490474700928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9156114570003372, "compression/movement_sparsity/importance_threshold": -0.0006002639905168285, "compression/movement_sparsity/linear_layer_sparsity": 0.8748714217003825, "compression/movement_sparsity/model_sparsity": 0.8448168969456761, "compression_loss": 97.47523498535156, "distillation_loss": 3.6871485710144043, "epoch": 3.69, "learning_rate": 3.508030431107354e-05, "loss": 101.7185, "step": 4360, "task_loss": 1.749078631401062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9157738467113636, "compression/movement_sparsity/importance_threshold": -0.0005991088965610052, "compression/movement_sparsity/linear_layer_sparsity": 0.8749352875422401, "compression/movement_sparsity/model_sparsity": 0.8448785687993917, "compression_loss": 97.49190521240234, "distillation_loss": 5.190817832946777, "epoch": 3.69, "learning_rate": 3.507560815253123e-05, "loss": 102.4636, "step": 4361, "task_loss": 2.813035011291504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9159360279632263, "compression/movement_sparsity/importance_threshold": -0.0005979552853956783, "compression/movement_sparsity/linear_layer_sparsity": 0.8750923049816692, "compression/movement_sparsity/model_sparsity": 0.8450301922067341, "compression_loss": 97.50859832763672, "distillation_loss": 6.625268936157227, "epoch": 3.69, "learning_rate": 3.507091199398892e-05, "loss": 103.3119, "step": 4362, "task_loss": 4.174845218658447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9160980008898099, "compression/movement_sparsity/importance_threshold": -0.0005968031560685122, "compression/movement_sparsity/linear_layer_sparsity": 0.8752116182030336, "compression/movement_sparsity/model_sparsity": 0.8451454066518944, "compression_loss": 97.52523040771484, "distillation_loss": 6.0885162353515625, "epoch": 3.69, "learning_rate": 3.506621583544661e-05, "loss": 102.7816, "step": 4363, "task_loss": 3.1689977645874023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.916259765625, "compression/movement_sparsity/importance_threshold": -0.0005956525076271646, "compression/movement_sparsity/linear_layer_sparsity": 0.8753402084268188, "compression/movement_sparsity/model_sparsity": 0.8452695794059026, "compression_loss": 97.5418701171875, "distillation_loss": 3.424043655395508, "epoch": 3.69, "learning_rate": 3.5061519676904294e-05, "loss": 101.7819, "step": 4364, "task_loss": 1.1217650175094604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9164213223026816, "compression/movement_sparsity/importance_threshold": -0.0005945033391193, "compression/movement_sparsity/linear_layer_sparsity": 0.8753507970876795, "compression/movement_sparsity/model_sparsity": 0.8452798043136882, "compression_loss": 97.55848693847656, "distillation_loss": 6.050121307373047, "epoch": 3.69, "learning_rate": 3.505682351836198e-05, "loss": 102.1655, "step": 4365, "task_loss": 3.3753910064697266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9165826710567396, "compression/movement_sparsity/importance_threshold": -0.0005933556495925805, "compression/movement_sparsity/linear_layer_sparsity": 0.8753784373082594, "compression/movement_sparsity/model_sparsity": 0.84530649500766, "compression_loss": 97.5750732421875, "distillation_loss": 4.465888023376465, "epoch": 3.69, "learning_rate": 3.505212735981967e-05, "loss": 101.8195, "step": 4366, "task_loss": 1.8745023012161255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9167438120210597, "compression/movement_sparsity/importance_threshold": -0.0005922094380946662, "compression/movement_sparsity/linear_layer_sparsity": 0.8753877620073506, "compression/movement_sparsity/model_sparsity": 0.8453154993746513, "compression_loss": 97.59162902832031, "distillation_loss": 4.401338577270508, "epoch": 3.69, "learning_rate": 3.504743120127736e-05, "loss": 101.906, "step": 4367, "task_loss": 3.5523393154144287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9169047453295265, "compression/movement_sparsity/importance_threshold": -0.0005910647036732209, "compression/movement_sparsity/linear_layer_sparsity": 0.875502555969181, "compression/movement_sparsity/model_sparsity": 0.8454263498107454, "compression_loss": 97.60818481445312, "distillation_loss": 3.768784284591675, "epoch": 3.69, "learning_rate": 3.504273504273504e-05, "loss": 101.6686, "step": 4368, "task_loss": 2.9519572257995605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9170654711160253, "compression/movement_sparsity/importance_threshold": -0.0005899214453759048, "compression/movement_sparsity/linear_layer_sparsity": 0.8756536232489598, "compression/movement_sparsity/model_sparsity": 0.8455722274647264, "compression_loss": 97.62469482421875, "distillation_loss": 6.250739574432373, "epoch": 3.69, "learning_rate": 3.503803888419273e-05, "loss": 102.1979, "step": 4369, "task_loss": 3.6798715591430664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9172259895144413, "compression/movement_sparsity/importance_threshold": -0.0005887796622503806, "compression/movement_sparsity/linear_layer_sparsity": 0.8758191426199132, "compression/movement_sparsity/model_sparsity": 0.8457320607360904, "compression_loss": 97.64122772216797, "distillation_loss": 4.940618515014648, "epoch": 3.69, "learning_rate": 3.503334272565042e-05, "loss": 102.1586, "step": 4370, "task_loss": 1.8774150609970093 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9173863006586598, "compression/movement_sparsity/importance_threshold": -0.0005876393533443096, "compression/movement_sparsity/linear_layer_sparsity": 0.8758655634045197, "compression/movement_sparsity/model_sparsity": 0.8457768868239386, "compression_loss": 97.65766906738281, "distillation_loss": 5.458067417144775, "epoch": 3.69, "learning_rate": 3.5028646567108106e-05, "loss": 101.6655, "step": 4371, "task_loss": 1.740576982498169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9175464046825655, "compression/movement_sparsity/importance_threshold": -0.0005865005177053554, "compression/movement_sparsity/linear_layer_sparsity": 0.8760063520517963, "compression/movement_sparsity/model_sparsity": 0.8459128389480647, "compression_loss": 97.67411804199219, "distillation_loss": 3.236703872680664, "epoch": 3.7, "learning_rate": 3.502395040856579e-05, "loss": 101.6583, "step": 4372, "task_loss": 1.53750479221344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.917706301720044, "compression/movement_sparsity/importance_threshold": -0.0005853631543811773, "compression/movement_sparsity/linear_layer_sparsity": 0.8761394138384448, "compression/movement_sparsity/model_sparsity": 0.8460413296529959, "compression_loss": 97.69056701660156, "distillation_loss": 4.758967399597168, "epoch": 3.7, "learning_rate": 3.501925425002348e-05, "loss": 101.9402, "step": 4373, "task_loss": 2.8947057723999023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9178659919049802, "compression/movement_sparsity/importance_threshold": -0.0005842272624194383, "compression/movement_sparsity/linear_layer_sparsity": 0.8762316949717789, "compression/movement_sparsity/model_sparsity": 0.8461304406455099, "compression_loss": 97.70694732666016, "distillation_loss": 4.31240701675415, "epoch": 3.7, "learning_rate": 3.501455809148117e-05, "loss": 101.9564, "step": 4374, "task_loss": 2.6558704376220703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9180254753712593, "compression/movement_sparsity/importance_threshold": -0.0005830928408678002, "compression/movement_sparsity/linear_layer_sparsity": 0.8763191587413879, "compression/movement_sparsity/model_sparsity": 0.8462148997655629, "compression_loss": 97.72334289550781, "distillation_loss": 3.069674253463745, "epoch": 3.7, "learning_rate": 3.500986193293886e-05, "loss": 101.8276, "step": 4375, "task_loss": 3.4361276626586914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9181847522527664, "compression/movement_sparsity/importance_threshold": -0.0005819598887739251, "compression/movement_sparsity/linear_layer_sparsity": 0.8764759853941347, "compression/movement_sparsity/model_sparsity": 0.8463663389403328, "compression_loss": 97.73970031738281, "distillation_loss": 3.569650411605835, "epoch": 3.7, "learning_rate": 3.500516577439655e-05, "loss": 101.6328, "step": 4376, "task_loss": 1.849444031715393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9183438226833867, "compression/movement_sparsity/importance_threshold": -0.0005808284051854748, "compression/movement_sparsity/linear_layer_sparsity": 0.8766166786480704, "compression/movement_sparsity/model_sparsity": 0.8465021989481725, "compression_loss": 97.756103515625, "distillation_loss": 3.6950435638427734, "epoch": 3.7, "learning_rate": 3.500046961585423e-05, "loss": 101.8142, "step": 4377, "task_loss": 2.389533281326294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9185026867970052, "compression/movement_sparsity/importance_threshold": -0.0005796983891501115, "compression/movement_sparsity/linear_layer_sparsity": 0.8767299701647786, "compression/movement_sparsity/model_sparsity": 0.8466115985527565, "compression_loss": 97.77243041992188, "distillation_loss": 4.350085258483887, "epoch": 3.7, "learning_rate": 3.499577345731192e-05, "loss": 102.2644, "step": 4378, "task_loss": 2.105607748031616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9186613447275072, "compression/movement_sparsity/importance_threshold": -0.000578569839715496, "compression/movement_sparsity/linear_layer_sparsity": 0.8768640455056763, "compression/movement_sparsity/model_sparsity": 0.8467410679932302, "compression_loss": 97.78874969482422, "distillation_loss": 5.529770851135254, "epoch": 3.7, "learning_rate": 3.499107729876961e-05, "loss": 102.616, "step": 4379, "task_loss": 2.0812699794769287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9188197966087779, "compression/movement_sparsity/importance_threshold": -0.0005774427559292906, "compression/movement_sparsity/linear_layer_sparsity": 0.8768658460549893, "compression/movement_sparsity/model_sparsity": 0.8467428066881352, "compression_loss": 97.8050537109375, "distillation_loss": 4.387889862060547, "epoch": 3.7, "learning_rate": 3.4986381140227296e-05, "loss": 102.3888, "step": 4380, "task_loss": 2.3309402465820312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9189780425747021, "compression/movement_sparsity/importance_threshold": -0.0005763171368391587, "compression/movement_sparsity/linear_layer_sparsity": 0.876930069621876, "compression/movement_sparsity/model_sparsity": 0.8468048239779246, "compression_loss": 97.82130432128906, "distillation_loss": 3.8322865962982178, "epoch": 3.7, "learning_rate": 3.498168498168498e-05, "loss": 102.3788, "step": 4381, "task_loss": 2.2923452854156494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9191360827591653, "compression/movement_sparsity/importance_threshold": -0.0005751929814927599, "compression/movement_sparsity/linear_layer_sparsity": 0.8770521254017968, "compression/movement_sparsity/model_sparsity": 0.8469226867663175, "compression_loss": 97.83758544921875, "distillation_loss": 4.734604835510254, "epoch": 3.7, "learning_rate": 3.497698882314267e-05, "loss": 102.6717, "step": 4382, "task_loss": 1.629370093345642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9192939172960525, "compression/movement_sparsity/importance_threshold": -0.000574070288937756, "compression/movement_sparsity/linear_layer_sparsity": 0.877041703679283, "compression/movement_sparsity/model_sparsity": 0.8469126230620331, "compression_loss": 97.85385131835938, "distillation_loss": 3.831840991973877, "epoch": 3.7, "learning_rate": 3.497229266460036e-05, "loss": 101.5783, "step": 4383, "task_loss": 2.1000192165374756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9194515463192489, "compression/movement_sparsity/importance_threshold": -0.0005729490582218107, "compression/movement_sparsity/linear_layer_sparsity": 0.8771230861233977, "compression/movement_sparsity/model_sparsity": 0.8469912097688308, "compression_loss": 97.87002563476562, "distillation_loss": 6.437242031097412, "epoch": 3.71, "learning_rate": 3.496759650605805e-05, "loss": 102.422, "step": 4384, "task_loss": 3.025775909423828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9196089699626395, "compression/movement_sparsity/importance_threshold": -0.0005718292883925844, "compression/movement_sparsity/linear_layer_sparsity": 0.8772355906450422, "compression/movement_sparsity/model_sparsity": 0.8470998494140524, "compression_loss": 97.88623809814453, "distillation_loss": 4.815033912658691, "epoch": 3.71, "learning_rate": 3.4962900347515735e-05, "loss": 102.6792, "step": 4385, "task_loss": 2.6405892372131348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9197661883601096, "compression/movement_sparsity/importance_threshold": -0.0005707109784977397, "compression/movement_sparsity/linear_layer_sparsity": 0.8772598205536782, "compression/movement_sparsity/model_sparsity": 0.8471232469507869, "compression_loss": 97.9024429321289, "distillation_loss": 3.8567111492156982, "epoch": 3.71, "learning_rate": 3.495820418897342e-05, "loss": 102.0558, "step": 4386, "task_loss": 1.9327900409698486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9199232016455442, "compression/movement_sparsity/importance_threshold": -0.0005695941275849388, "compression/movement_sparsity/linear_layer_sparsity": 0.8772965112174937, "compression/movement_sparsity/model_sparsity": 0.8471586771774267, "compression_loss": 97.91856384277344, "distillation_loss": 4.767773628234863, "epoch": 3.71, "learning_rate": 3.495350803043111e-05, "loss": 102.4685, "step": 4387, "task_loss": 2.5777482986450195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9200800099528286, "compression/movement_sparsity/importance_threshold": -0.0005684787347018418, "compression/movement_sparsity/linear_layer_sparsity": 0.8774660728812758, "compression/movement_sparsity/model_sparsity": 0.847322413876425, "compression_loss": 97.93461608886719, "distillation_loss": 4.752980709075928, "epoch": 3.71, "learning_rate": 3.49488118718888e-05, "loss": 102.3886, "step": 4388, "task_loss": 1.986894130706787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9202366134158478, "compression/movement_sparsity/importance_threshold": -0.0005673647988961123, "compression/movement_sparsity/linear_layer_sparsity": 0.8774665140754783, "compression/movement_sparsity/model_sparsity": 0.8473228399142494, "compression_loss": 97.95079040527344, "distillation_loss": 4.827150344848633, "epoch": 3.71, "learning_rate": 3.494411571334649e-05, "loss": 101.9577, "step": 4389, "task_loss": 3.572648048400879 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9203930121684869, "compression/movement_sparsity/importance_threshold": -0.0005662523192154125, "compression/movement_sparsity/linear_layer_sparsity": 0.8775551225651803, "compression/movement_sparsity/model_sparsity": 0.8474084044297386, "compression_loss": 97.96682739257812, "distillation_loss": 4.841347694396973, "epoch": 3.71, "learning_rate": 3.493941955480417e-05, "loss": 102.2869, "step": 4390, "task_loss": 2.744504928588867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9205492063446312, "compression/movement_sparsity/importance_threshold": -0.0005651412947074025, "compression/movement_sparsity/linear_layer_sparsity": 0.8776647533624244, "compression/movement_sparsity/model_sparsity": 0.8475142690718337, "compression_loss": 97.98283386230469, "distillation_loss": 6.234444618225098, "epoch": 3.71, "learning_rate": 3.493472339626186e-05, "loss": 102.8008, "step": 4391, "task_loss": 2.661376953125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9207051960781659, "compression/movement_sparsity/importance_threshold": -0.0005640317244197442, "compression/movement_sparsity/linear_layer_sparsity": 0.8777331146154809, "compression/movement_sparsity/model_sparsity": 0.8475802819055438, "compression_loss": 97.99890899658203, "distillation_loss": 5.531033992767334, "epoch": 3.71, "learning_rate": 3.4930027237719546e-05, "loss": 102.1162, "step": 4392, "task_loss": 3.0056354999542236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9208609815029759, "compression/movement_sparsity/importance_threshold": -0.0005629236074001015, "compression/movement_sparsity/linear_layer_sparsity": 0.8777977078315644, "compression/movement_sparsity/model_sparsity": 0.8476426561459428, "compression_loss": 98.01494598388672, "distillation_loss": 5.426926136016846, "epoch": 3.71, "learning_rate": 3.492533107917724e-05, "loss": 102.4858, "step": 4393, "task_loss": 2.7949817180633545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9210165627529465, "compression/movement_sparsity/importance_threshold": -0.0005618169426961335, "compression/movement_sparsity/linear_layer_sparsity": 0.8778658782979386, "compression/movement_sparsity/model_sparsity": 0.8477084847470802, "compression_loss": 98.0309066772461, "distillation_loss": 3.2543997764587402, "epoch": 3.71, "learning_rate": 3.492063492063492e-05, "loss": 101.9283, "step": 4394, "task_loss": 2.918078899383545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9211719399619628, "compression/movement_sparsity/importance_threshold": -0.000560711729355505, "compression/movement_sparsity/linear_layer_sparsity": 0.8779222796108562, "compression/movement_sparsity/model_sparsity": 0.8477629485013883, "compression_loss": 98.0468978881836, "distillation_loss": 4.309091567993164, "epoch": 3.71, "learning_rate": 3.491593876209261e-05, "loss": 101.8686, "step": 4395, "task_loss": 2.7443108558654785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9213271132639099, "compression/movement_sparsity/importance_threshold": -0.0005596079664258753, "compression/movement_sparsity/linear_layer_sparsity": 0.877986240846055, "compression/movement_sparsity/model_sparsity": 0.8478247124713902, "compression_loss": 98.0628433227539, "distillation_loss": 3.4581096172332764, "epoch": 3.72, "learning_rate": 3.49112426035503e-05, "loss": 102.4653, "step": 4396, "task_loss": 1.2475072145462036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.921482082792673, "compression/movement_sparsity/importance_threshold": -0.0005585056529549088, "compression/movement_sparsity/linear_layer_sparsity": 0.8781514501886498, "compression/movement_sparsity/model_sparsity": 0.8479842463648236, "compression_loss": 98.07878112792969, "distillation_loss": 3.426868200302124, "epoch": 3.72, "learning_rate": 3.4906546445007984e-05, "loss": 101.8421, "step": 4397, "task_loss": 2.5106265544891357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9216368486821372, "compression/movement_sparsity/importance_threshold": -0.0005574047879902642, "compression/movement_sparsity/linear_layer_sparsity": 0.8782475709039625, "compression/movement_sparsity/model_sparsity": 0.8480770650378634, "compression_loss": 98.09471893310547, "distillation_loss": 2.6400020122528076, "epoch": 3.72, "learning_rate": 3.490185028646567e-05, "loss": 101.4421, "step": 4398, "task_loss": 1.89017653465271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9217914110661877, "compression/movement_sparsity/importance_threshold": -0.0005563053705796061, "compression/movement_sparsity/linear_layer_sparsity": 0.8782750799586985, "compression/movement_sparsity/model_sparsity": 0.8481036290719415, "compression_loss": 98.11058044433594, "distillation_loss": 4.775003433227539, "epoch": 3.72, "learning_rate": 3.489715412792336e-05, "loss": 102.7357, "step": 4399, "task_loss": 2.8160622119903564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9219457700787095, "compression/movement_sparsity/importance_threshold": -0.0005552073997705954, "compression/movement_sparsity/linear_layer_sparsity": 0.8784558980367287, "compression/movement_sparsity/model_sparsity": 0.8482782354927298, "compression_loss": 98.12641143798828, "distillation_loss": 6.610072612762451, "epoch": 3.72, "learning_rate": 3.489245796938105e-05, "loss": 102.8758, "step": 4400, "task_loss": 2.4263997077941895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9220999258535879, "compression/movement_sparsity/importance_threshold": -0.0005541108746108933, "compression/movement_sparsity/linear_layer_sparsity": 0.8785140522022888, "compression/movement_sparsity/model_sparsity": 0.8483343918837998, "compression_loss": 98.14226531982422, "distillation_loss": 4.303683757781982, "epoch": 3.72, "learning_rate": 3.4887761810838736e-05, "loss": 102.2369, "step": 4401, "task_loss": 4.6938042640686035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9222538785247081, "compression/movement_sparsity/importance_threshold": -0.0005530157941481617, "compression/movement_sparsity/linear_layer_sparsity": 0.8785357064907156, "compression/movement_sparsity/model_sparsity": 0.8483553022808027, "compression_loss": 98.15808868408203, "distillation_loss": 4.618495941162109, "epoch": 3.72, "learning_rate": 3.488306565229642e-05, "loss": 102.1673, "step": 4402, "task_loss": 1.5952517986297607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.922407628225955, "compression/movement_sparsity/importance_threshold": -0.0005519221574300627, "compression/movement_sparsity/linear_layer_sparsity": 0.8786973743555226, "compression/movement_sparsity/model_sparsity": 0.8485114163571051, "compression_loss": 98.17382049560547, "distillation_loss": 3.740405797958374, "epoch": 3.72, "learning_rate": 3.487836949375411e-05, "loss": 102.1526, "step": 4403, "task_loss": 2.116986036300659 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9225611750912138, "compression/movement_sparsity/importance_threshold": -0.00055082996350426, "compression/movement_sparsity/linear_layer_sparsity": 0.8788413586797257, "compression/movement_sparsity/model_sparsity": 0.8486504543768242, "compression_loss": 98.18962097167969, "distillation_loss": 5.396318435668945, "epoch": 3.72, "learning_rate": 3.4873673335211795e-05, "loss": 102.4187, "step": 4404, "task_loss": 2.9794914722442627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9227145192543698, "compression/movement_sparsity/importance_threshold": -0.0005497392114184136, "compression/movement_sparsity/linear_layer_sparsity": 0.8789578220250251, "compression/movement_sparsity/model_sparsity": 0.8487629168479295, "compression_loss": 98.20539093017578, "distillation_loss": 4.1959991455078125, "epoch": 3.72, "learning_rate": 3.486897717666949e-05, "loss": 102.2367, "step": 4405, "task_loss": 1.4901236295700073 }, { "compression/movement_sparsity/importance_regularization_factor": 0.922867660849308, "compression/movement_sparsity/importance_threshold": -0.000548649900220184, "compression/movement_sparsity/linear_layer_sparsity": 0.87903592532304, "compression/movement_sparsity/model_sparsity": 0.8488383370573837, "compression_loss": 98.22109985351562, "distillation_loss": 5.575104236602783, "epoch": 3.72, "learning_rate": 3.4864281018127175e-05, "loss": 102.6432, "step": 4406, "task_loss": 2.6789731979370117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9230206000099137, "compression/movement_sparsity/importance_threshold": -0.0005475620289572356, "compression/movement_sparsity/linear_layer_sparsity": 0.8791290292239409, "compression/movement_sparsity/model_sparsity": 0.8489282425528675, "compression_loss": 98.23692321777344, "distillation_loss": 3.514693021774292, "epoch": 3.72, "learning_rate": 3.485958485958486e-05, "loss": 102.5555, "step": 4407, "task_loss": 1.1206004619598389 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9231733368700717, "compression/movement_sparsity/importance_threshold": -0.0005464755966772296, "compression/movement_sparsity/linear_layer_sparsity": 0.8791905698531095, "compression/movement_sparsity/model_sparsity": 0.8489876690721031, "compression_loss": 98.25261688232422, "distillation_loss": 3.313815116882324, "epoch": 3.73, "learning_rate": 3.485488870104255e-05, "loss": 102.8852, "step": 4408, "task_loss": 1.9999123811721802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9233258715636676, "compression/movement_sparsity/importance_threshold": -0.000545390602427827, "compression/movement_sparsity/linear_layer_sparsity": 0.8793053161182695, "compression/movement_sparsity/model_sparsity": 0.849098473450054, "compression_loss": 98.26836395263672, "distillation_loss": 6.518400192260742, "epoch": 3.73, "learning_rate": 3.485019254250024e-05, "loss": 103.5665, "step": 4409, "task_loss": 3.380814552307129 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9234782042245862, "compression/movement_sparsity/importance_threshold": -0.0005443070452566907, "compression/movement_sparsity/linear_layer_sparsity": 0.8793828112837349, "compression/movement_sparsity/model_sparsity": 0.8491733064181827, "compression_loss": 98.28416442871094, "distillation_loss": 6.030604362487793, "epoch": 3.73, "learning_rate": 3.484549638395793e-05, "loss": 103.5585, "step": 4410, "task_loss": 2.9665775299072266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9236303349867128, "compression/movement_sparsity/importance_threshold": -0.0005432249242114818, "compression/movement_sparsity/linear_layer_sparsity": 0.8795049982294996, "compression/movement_sparsity/model_sparsity": 0.8492912958664693, "compression_loss": 98.29985046386719, "distillation_loss": 5.661697864532471, "epoch": 3.73, "learning_rate": 3.4840800225415607e-05, "loss": 103.2079, "step": 4411, "task_loss": 3.183577060699463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9237822639839324, "compression/movement_sparsity/importance_threshold": -0.0005421442383398622, "compression/movement_sparsity/linear_layer_sparsity": 0.8795335208384846, "compression/movement_sparsity/model_sparsity": 0.8493188386360899, "compression_loss": 98.31549072265625, "distillation_loss": 3.880539894104004, "epoch": 3.73, "learning_rate": 3.48361040668733e-05, "loss": 103.0181, "step": 4412, "task_loss": 2.789670705795288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9239339913501302, "compression/movement_sparsity/importance_threshold": -0.0005410649866894948, "compression/movement_sparsity/linear_layer_sparsity": 0.8795490580289141, "compression/movement_sparsity/model_sparsity": 0.8493338420762302, "compression_loss": 98.33114624023438, "distillation_loss": 3.11503529548645, "epoch": 3.73, "learning_rate": 3.4831407908330986e-05, "loss": 102.8959, "step": 4413, "task_loss": 2.650188684463501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9240855172191914, "compression/movement_sparsity/importance_threshold": -0.0005399871683080398, "compression/movement_sparsity/linear_layer_sparsity": 0.8796025140724256, "compression/movement_sparsity/model_sparsity": 0.8493854617401971, "compression_loss": 98.34679412841797, "distillation_loss": 4.839973449707031, "epoch": 3.73, "learning_rate": 3.482671174978868e-05, "loss": 102.2996, "step": 4414, "task_loss": 2.389908790588379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9242368417250012, "compression/movement_sparsity/importance_threshold": -0.0005389107822431601, "compression/movement_sparsity/linear_layer_sparsity": 0.879636426405182, "compression/movement_sparsity/model_sparsity": 0.8494182090799968, "compression_loss": 98.3624038696289, "distillation_loss": 4.3331403732299805, "epoch": 3.73, "learning_rate": 3.482201559124636e-05, "loss": 102.438, "step": 4415, "task_loss": 2.897165298461914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9243879650014446, "compression/movement_sparsity/importance_threshold": -0.0005378358275425167, "compression/movement_sparsity/linear_layer_sparsity": 0.8797414544737188, "compression/movement_sparsity/model_sparsity": 0.8495196291112751, "compression_loss": 98.37788391113281, "distillation_loss": 4.090211391448975, "epoch": 3.73, "learning_rate": 3.481731943270405e-05, "loss": 102.3404, "step": 4416, "task_loss": 1.874940037727356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9245388871824067, "compression/movement_sparsity/importance_threshold": -0.0005367623032537733, "compression/movement_sparsity/linear_layer_sparsity": 0.8798722745168518, "compression/movement_sparsity/model_sparsity": 0.849645955083477, "compression_loss": 98.39344024658203, "distillation_loss": 3.1934242248535156, "epoch": 3.73, "learning_rate": 3.481262327416174e-05, "loss": 102.7655, "step": 4417, "task_loss": 2.480137348175049 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9246896084017728, "compression/movement_sparsity/importance_threshold": -0.0005356902084245902, "compression/movement_sparsity/linear_layer_sparsity": 0.8799597621347961, "compression/movement_sparsity/model_sparsity": 0.8497304372326016, "compression_loss": 98.4089126586914, "distillation_loss": 4.283855438232422, "epoch": 3.73, "learning_rate": 3.4807927115619424e-05, "loss": 103.2355, "step": 4418, "task_loss": 1.424437403678894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.924840128793428, "compression/movement_sparsity/importance_threshold": -0.0005346195421026285, "compression/movement_sparsity/linear_layer_sparsity": 0.8801141443331777, "compression/movement_sparsity/model_sparsity": 0.8498795159275334, "compression_loss": 98.42438507080078, "distillation_loss": 3.815579414367676, "epoch": 3.73, "learning_rate": 3.480323095707712e-05, "loss": 102.2791, "step": 4419, "task_loss": 1.3268206119537354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9249904484912574, "compression/movement_sparsity/importance_threshold": -0.0005335503033355518, "compression/movement_sparsity/linear_layer_sparsity": 0.8802320385785934, "compression/movement_sparsity/model_sparsity": 0.849993360142934, "compression_loss": 98.43980407714844, "distillation_loss": 4.688684463500977, "epoch": 3.74, "learning_rate": 3.47985347985348e-05, "loss": 103.3298, "step": 4420, "task_loss": 3.036497116088867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9251405676291462, "compression/movement_sparsity/importance_threshold": -0.0005324824911710213, "compression/movement_sparsity/linear_layer_sparsity": 0.8802752398379382, "compression/movement_sparsity/model_sparsity": 0.8500350773061177, "compression_loss": 98.45523834228516, "distillation_loss": 5.036919593811035, "epoch": 3.74, "learning_rate": 3.479383863999249e-05, "loss": 103.1663, "step": 4421, "task_loss": 2.0363194942474365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9252904863409794, "compression/movement_sparsity/importance_threshold": -0.0005314161046566989, "compression/movement_sparsity/linear_layer_sparsity": 0.8802708755925834, "compression/movement_sparsity/model_sparsity": 0.8500308629860169, "compression_loss": 98.47061157226562, "distillation_loss": 3.8046655654907227, "epoch": 3.74, "learning_rate": 3.4789142481450177e-05, "loss": 102.6319, "step": 4422, "task_loss": 2.7111430168151855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9254402047606423, "compression/movement_sparsity/importance_threshold": -0.0005303511428402467, "compression/movement_sparsity/linear_layer_sparsity": 0.8803607361198873, "compression/movement_sparsity/model_sparsity": 0.8501176365277645, "compression_loss": 98.48602294921875, "distillation_loss": 4.15582275390625, "epoch": 3.74, "learning_rate": 3.478444632290786e-05, "loss": 102.8358, "step": 4423, "task_loss": 2.8964500427246094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9255897230220199, "compression/movement_sparsity/importance_threshold": -0.0005292876047693256, "compression/movement_sparsity/linear_layer_sparsity": 0.8804661934584589, "compression/movement_sparsity/model_sparsity": 0.8502194710823314, "compression_loss": 98.50135803222656, "distillation_loss": 4.502528190612793, "epoch": 3.74, "learning_rate": 3.477975016436555e-05, "loss": 102.8232, "step": 4424, "task_loss": 3.3766138553619385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9257390412589975, "compression/movement_sparsity/importance_threshold": -0.0005282254894915985, "compression/movement_sparsity/linear_layer_sparsity": 0.8805087985094218, "compression/movement_sparsity/model_sparsity": 0.8502606125187254, "compression_loss": 98.51667022705078, "distillation_loss": 5.480388164520264, "epoch": 3.74, "learning_rate": 3.4775054005823236e-05, "loss": 103.5289, "step": 4425, "task_loss": 3.5890626907348633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9258881596054601, "compression/movement_sparsity/importance_threshold": -0.0005271647960547266, "compression/movement_sparsity/linear_layer_sparsity": 0.8805295942577788, "compression/movement_sparsity/model_sparsity": 0.8502806938691511, "compression_loss": 98.53204345703125, "distillation_loss": 3.571627616882324, "epoch": 3.74, "learning_rate": 3.477035784728093e-05, "loss": 102.2497, "step": 4426, "task_loss": 2.033351421356201 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9260370781952929, "compression/movement_sparsity/importance_threshold": -0.0005261055235063717, "compression/movement_sparsity/linear_layer_sparsity": 0.880639022344173, "compression/movement_sparsity/model_sparsity": 0.8503863627641376, "compression_loss": 98.5472640991211, "distillation_loss": 5.225177764892578, "epoch": 3.74, "learning_rate": 3.4765661688738615e-05, "loss": 103.0419, "step": 4427, "task_loss": 2.969907760620117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.926185797162381, "compression/movement_sparsity/importance_threshold": -0.0005250476708941968, "compression/movement_sparsity/linear_layer_sparsity": 0.8807440504127098, "compression/movement_sparsity/model_sparsity": 0.8504877827954158, "compression_loss": 98.56250762939453, "distillation_loss": 4.071252346038818, "epoch": 3.74, "learning_rate": 3.47609655301963e-05, "loss": 102.822, "step": 4428, "task_loss": 1.9235905408859253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9263343166406097, "compression/movement_sparsity/importance_threshold": -0.000523991237265862, "compression/movement_sparsity/linear_layer_sparsity": 0.88087089970802, "compression/movement_sparsity/model_sparsity": 0.8506102744271982, "compression_loss": 98.57775115966797, "distillation_loss": 3.525042772293091, "epoch": 3.74, "learning_rate": 3.475626937165399e-05, "loss": 102.2736, "step": 4429, "task_loss": 2.2840027809143066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9264826367638639, "compression/movement_sparsity/importance_threshold": -0.0005229362216690311, "compression/movement_sparsity/linear_layer_sparsity": 0.8809181790326963, "compression/movement_sparsity/model_sparsity": 0.8506559295616235, "compression_loss": 98.59297943115234, "distillation_loss": 3.4099533557891846, "epoch": 3.74, "learning_rate": 3.4751573213111674e-05, "loss": 102.4356, "step": 4430, "task_loss": 2.4610323905944824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9266307576660289, "compression/movement_sparsity/importance_threshold": -0.0005218826231513643, "compression/movement_sparsity/linear_layer_sparsity": 0.8809971527949486, "compression/movement_sparsity/model_sparsity": 0.8507321903321908, "compression_loss": 98.60816192626953, "distillation_loss": 4.982372283935547, "epoch": 3.75, "learning_rate": 3.474687705456937e-05, "loss": 103.133, "step": 4431, "task_loss": 3.1631925106048584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9267786794809898, "compression/movement_sparsity/importance_threshold": -0.0005208304407605235, "compression/movement_sparsity/linear_layer_sparsity": 0.8810658002280283, "compression/movement_sparsity/model_sparsity": 0.85079847951476, "compression_loss": 98.62330627441406, "distillation_loss": 4.068840503692627, "epoch": 3.75, "learning_rate": 3.474218089602705e-05, "loss": 103.2773, "step": 4432, "task_loss": 2.6023313999176025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9269264023426318, "compression/movement_sparsity/importance_threshold": -0.0005197796735441707, "compression/movement_sparsity/linear_layer_sparsity": 0.8811924229641535, "compression/movement_sparsity/model_sparsity": 0.8509207523703622, "compression_loss": 98.63846588134766, "distillation_loss": 4.583135604858398, "epoch": 3.75, "learning_rate": 3.473748473748474e-05, "loss": 104.3271, "step": 4433, "task_loss": 2.739173173904419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9270739263848399, "compression/movement_sparsity/importance_threshold": -0.0005187303205499696, "compression/movement_sparsity/linear_layer_sparsity": 0.8813300517070066, "compression/movement_sparsity/model_sparsity": 0.8510536531425028, "compression_loss": 98.65357208251953, "distillation_loss": 3.8744595050811768, "epoch": 3.75, "learning_rate": 3.4732788578942426e-05, "loss": 102.4334, "step": 4434, "task_loss": 2.0749192237854004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9272212517414994, "compression/movement_sparsity/importance_threshold": -0.0005176823808255796, "compression/movement_sparsity/linear_layer_sparsity": 0.881422022811982, "compression/movement_sparsity/model_sparsity": 0.8511424647570861, "compression_loss": 98.66866302490234, "distillation_loss": 4.800232887268066, "epoch": 3.75, "learning_rate": 3.472809242040011e-05, "loss": 102.9516, "step": 4435, "task_loss": 2.6163361072540283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9273683785464952, "compression/movement_sparsity/importance_threshold": -0.0005166358534186644, "compression/movement_sparsity/linear_layer_sparsity": 0.8814942117228495, "compression/movement_sparsity/model_sparsity": 0.8512121737567863, "compression_loss": 98.68379974365234, "distillation_loss": 4.333188533782959, "epoch": 3.75, "learning_rate": 3.4723396261857806e-05, "loss": 102.7973, "step": 4436, "task_loss": 2.1380553245544434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9275153069337128, "compression/movement_sparsity/importance_threshold": -0.0005155907373768842, "compression/movement_sparsity/linear_layer_sparsity": 0.8816312323331532, "compression/movement_sparsity/model_sparsity": 0.8513444872876013, "compression_loss": 98.69889831542969, "distillation_loss": 4.016592025756836, "epoch": 3.75, "learning_rate": 3.4718700103315485e-05, "loss": 102.7446, "step": 4437, "task_loss": 1.9854427576065063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9276620370370371, "compression/movement_sparsity/importance_threshold": -0.0005145470317479009, "compression/movement_sparsity/linear_layer_sparsity": 0.881659432989612, "compression/movement_sparsity/model_sparsity": 0.8513717191647554, "compression_loss": 98.71393585205078, "distillation_loss": 6.524956703186035, "epoch": 3.75, "learning_rate": 3.471400394477318e-05, "loss": 103.4606, "step": 4438, "task_loss": 3.5826659202575684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9278085689903532, "compression/movement_sparsity/importance_threshold": -0.0005135047355793784, "compression/movement_sparsity/linear_layer_sparsity": 0.881760490310326, "compression/movement_sparsity/model_sparsity": 0.8514693048556141, "compression_loss": 98.7289810180664, "distillation_loss": 3.9144983291625977, "epoch": 3.75, "learning_rate": 3.4709307786230865e-05, "loss": 102.5583, "step": 4439, "task_loss": 1.9184365272521973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9279549029275463, "compression/movement_sparsity/importance_threshold": -0.0005124638479189775, "compression/movement_sparsity/linear_layer_sparsity": 0.8817881543792412, "compression/movement_sparsity/model_sparsity": 0.8514960185786574, "compression_loss": 98.74407196044922, "distillation_loss": 3.582336902618408, "epoch": 3.75, "learning_rate": 3.470461162768856e-05, "loss": 102.9002, "step": 4440, "task_loss": 1.9493331909179688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9281010389825016, "compression/movement_sparsity/importance_threshold": -0.0005114243678143596, "compression/movement_sparsity/linear_layer_sparsity": 0.881884895151271, "compression/movement_sparsity/model_sparsity": 0.8515894360075585, "compression_loss": 98.75904083251953, "distillation_loss": 6.385575771331787, "epoch": 3.75, "learning_rate": 3.469991546914624e-05, "loss": 103.5047, "step": 4441, "task_loss": 2.8784375190734863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9282469772891043, "compression/movement_sparsity/importance_threshold": -0.0005103862943131856, "compression/movement_sparsity/linear_layer_sparsity": 0.8819799307673287, "compression/movement_sparsity/model_sparsity": 0.8516812068578411, "compression_loss": 98.77405548095703, "distillation_loss": 3.969663143157959, "epoch": 3.75, "learning_rate": 3.4695219310603924e-05, "loss": 102.7429, "step": 4442, "task_loss": 1.9445146322250366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9283927179812393, "compression/movement_sparsity/importance_threshold": -0.0005093496264631202, "compression/movement_sparsity/linear_layer_sparsity": 0.8821508398620536, "compression/movement_sparsity/model_sparsity": 0.8518462446993842, "compression_loss": 98.78902435302734, "distillation_loss": 5.244895935058594, "epoch": 3.76, "learning_rate": 3.469052315206162e-05, "loss": 103.3681, "step": 4443, "task_loss": 2.351222038269043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.928538261192792, "compression/movement_sparsity/importance_threshold": -0.0005083143633118218, "compression/movement_sparsity/linear_layer_sparsity": 0.8822855352596684, "compression/movement_sparsity/model_sparsity": 0.8519763128957193, "compression_loss": 98.80393981933594, "distillation_loss": 4.2960734367370605, "epoch": 3.76, "learning_rate": 3.46858269935193e-05, "loss": 103.1267, "step": 4444, "task_loss": 2.738481044769287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9286836070576474, "compression/movement_sparsity/importance_threshold": -0.0005072805039069549, "compression/movement_sparsity/linear_layer_sparsity": 0.8823310378833668, "compression/movement_sparsity/model_sparsity": 0.8520202523643113, "compression_loss": 98.81888580322266, "distillation_loss": 4.025806903839111, "epoch": 3.76, "learning_rate": 3.468113083497699e-05, "loss": 102.6742, "step": 4445, "task_loss": 2.5665955543518066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9288287557096907, "compression/movement_sparsity/importance_threshold": -0.0005062480472961808, "compression/movement_sparsity/linear_layer_sparsity": 0.8823036122978043, "compression/movement_sparsity/model_sparsity": 0.8519937689319839, "compression_loss": 98.83377075195312, "distillation_loss": 7.295654296875, "epoch": 3.76, "learning_rate": 3.4676434676434676e-05, "loss": 103.8169, "step": 4446, "task_loss": 3.421022891998291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.928973707282807, "compression/movement_sparsity/importance_threshold": -0.0005052169925271605, "compression/movement_sparsity/linear_layer_sparsity": 0.8824455933618445, "compression/movement_sparsity/model_sparsity": 0.8521308725096894, "compression_loss": 98.8486328125, "distillation_loss": 5.0106048583984375, "epoch": 3.76, "learning_rate": 3.467173851789237e-05, "loss": 103.3263, "step": 4447, "task_loss": 4.022635459899902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9291184619108814, "compression/movement_sparsity/importance_threshold": -0.0005041873386475559, "compression/movement_sparsity/linear_layer_sparsity": 0.8824914298622367, "compression/movement_sparsity/model_sparsity": 0.8521751343852837, "compression_loss": 98.86345672607422, "distillation_loss": 3.8422203063964844, "epoch": 3.76, "learning_rate": 3.4667042359350055e-05, "loss": 103.064, "step": 4448, "task_loss": 1.878659725189209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9292630197277991, "compression/movement_sparsity/importance_threshold": -0.0005031590847050307, "compression/movement_sparsity/linear_layer_sparsity": 0.8825599461294724, "compression/movement_sparsity/model_sparsity": 0.8522412969079591, "compression_loss": 98.87828826904297, "distillation_loss": 4.289934158325195, "epoch": 3.76, "learning_rate": 3.4662346200807735e-05, "loss": 102.2394, "step": 4449, "task_loss": 1.0732502937316895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9294073808674452, "compression/movement_sparsity/importance_threshold": -0.0005021322297472444, "compression/movement_sparsity/linear_layer_sparsity": 0.8826647595629916, "compression/movement_sparsity/model_sparsity": 0.8523425096775931, "compression_loss": 98.89307403564453, "distillation_loss": 3.4446499347686768, "epoch": 3.76, "learning_rate": 3.465765004226543e-05, "loss": 102.6104, "step": 4450, "task_loss": 2.573899984359741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.929551545463705, "compression/movement_sparsity/importance_threshold": -0.0005011067728218598, "compression/movement_sparsity/linear_layer_sparsity": 0.8826997212225, "compression/movement_sparsity/model_sparsity": 0.8523762702965427, "compression_loss": 98.90782928466797, "distillation_loss": 6.528578758239746, "epoch": 3.76, "learning_rate": 3.4652953883723114e-05, "loss": 103.3232, "step": 4451, "task_loss": 3.0551669597625732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9296955136504633, "compression/movement_sparsity/importance_threshold": -0.0005000827129765396, "compression/movement_sparsity/linear_layer_sparsity": 0.882745009211181, "compression/movement_sparsity/model_sparsity": 0.8524200025034904, "compression_loss": 98.92261505126953, "distillation_loss": 5.065492630004883, "epoch": 3.76, "learning_rate": 3.464825772518081e-05, "loss": 103.6661, "step": 4452, "task_loss": 2.5728533267974854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9298392855616057, "compression/movement_sparsity/importance_threshold": -0.0004990600492589441, "compression/movement_sparsity/linear_layer_sparsity": 0.8828793111112637, "compression/movement_sparsity/model_sparsity": 0.8525496907201443, "compression_loss": 98.93736267089844, "distillation_loss": 4.576075077056885, "epoch": 3.76, "learning_rate": 3.4643561566638494e-05, "loss": 102.638, "step": 4453, "task_loss": 2.252444267272949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9299828613310169, "compression/movement_sparsity/importance_threshold": -0.0004980387807167353, "compression/movement_sparsity/linear_layer_sparsity": 0.8829896931310689, "compression/movement_sparsity/model_sparsity": 0.8526562807779944, "compression_loss": 98.95201873779297, "distillation_loss": 5.467743873596191, "epoch": 3.76, "learning_rate": 3.463886540809618e-05, "loss": 103.9185, "step": 4454, "task_loss": 3.238067865371704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9301262410925824, "compression/movement_sparsity/importance_threshold": -0.0004970189063975769, "compression/movement_sparsity/linear_layer_sparsity": 0.8830120270970508, "compression/movement_sparsity/model_sparsity": 0.8526778475035375, "compression_loss": 98.96672058105469, "distillation_loss": 4.18864107131958, "epoch": 3.77, "learning_rate": 3.4634169249553866e-05, "loss": 102.9691, "step": 4455, "task_loss": 1.5946985483169556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.930269424980187, "compression/movement_sparsity/importance_threshold": -0.0004960004253491282, "compression/movement_sparsity/linear_layer_sparsity": 0.883027218486619, "compression/movement_sparsity/model_sparsity": 0.8526925170221398, "compression_loss": 98.98135375976562, "distillation_loss": 3.5828604698181152, "epoch": 3.77, "learning_rate": 3.462947309101155e-05, "loss": 102.8171, "step": 4456, "task_loss": 2.27610182762146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.930412413127716, "compression/movement_sparsity/importance_threshold": -0.0004949833366190539, "compression/movement_sparsity/linear_layer_sparsity": 0.883176866790449, "compression/movement_sparsity/model_sparsity": 0.8528370244463612, "compression_loss": 98.99601745605469, "distillation_loss": 5.503192901611328, "epoch": 3.77, "learning_rate": 3.4624776932469246e-05, "loss": 103.2856, "step": 4457, "task_loss": 3.1406311988830566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9305552056690547, "compression/movement_sparsity/importance_threshold": -0.0004939676392550132, "compression/movement_sparsity/linear_layer_sparsity": 0.8832998168829425, "compression/movement_sparsity/model_sparsity": 0.8529557508249388, "compression_loss": 99.01061248779297, "distillation_loss": 5.239018440246582, "epoch": 3.77, "learning_rate": 3.4620080773926925e-05, "loss": 103.3327, "step": 4458, "task_loss": 1.674445629119873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9306978027380881, "compression/movement_sparsity/importance_threshold": -0.000492953332304669, "compression/movement_sparsity/linear_layer_sparsity": 0.8833258711892268, "compression/movement_sparsity/model_sparsity": 0.8529809100856498, "compression_loss": 99.02525329589844, "distillation_loss": 4.680885314941406, "epoch": 3.77, "learning_rate": 3.461538461538462e-05, "loss": 103.1107, "step": 4459, "task_loss": 2.6622536182403564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9308402044687013, "compression/movement_sparsity/importance_threshold": -0.0004919404148156833, "compression/movement_sparsity/linear_layer_sparsity": 0.8834315670111511, "compression/movement_sparsity/model_sparsity": 0.8530829749309327, "compression_loss": 99.03984832763672, "distillation_loss": 3.092892646789551, "epoch": 3.77, "learning_rate": 3.4610688456842305e-05, "loss": 103.0968, "step": 4460, "task_loss": 2.4200284481048584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9309824109947794, "compression/movement_sparsity/importance_threshold": -0.000490928885835718, "compression/movement_sparsity/linear_layer_sparsity": 0.8835555306578935, "compression/movement_sparsity/model_sparsity": 0.8532026800450527, "compression_loss": 99.05441284179688, "distillation_loss": 4.4522504806518555, "epoch": 3.77, "learning_rate": 3.460599229829999e-05, "loss": 103.8007, "step": 4461, "task_loss": 2.2847254276275635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9311244224502077, "compression/movement_sparsity/importance_threshold": -0.0004899187444124351, "compression/movement_sparsity/linear_layer_sparsity": 0.883643602560052, "compression/movement_sparsity/model_sparsity": 0.8532877264064311, "compression_loss": 99.06893157958984, "distillation_loss": 4.708257675170898, "epoch": 3.77, "learning_rate": 3.460129613975768e-05, "loss": 103.6891, "step": 4462, "task_loss": 2.5645902156829834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9312662389688712, "compression/movement_sparsity/importance_threshold": -0.0004889099895934957, "compression/movement_sparsity/linear_layer_sparsity": 0.883769414452778, "compression/movement_sparsity/model_sparsity": 0.8534092162735993, "compression_loss": 99.08350372314453, "distillation_loss": 4.818932056427002, "epoch": 3.77, "learning_rate": 3.4596599981215364e-05, "loss": 103.5926, "step": 4463, "task_loss": 2.4528422355651855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9314078606846552, "compression/movement_sparsity/importance_threshold": -0.0004879026204265618, "compression/movement_sparsity/linear_layer_sparsity": 0.8839289478915782, "compression/movement_sparsity/model_sparsity": 0.8535632692479945, "compression_loss": 99.09800720214844, "distillation_loss": 5.888701438903809, "epoch": 3.77, "learning_rate": 3.459190382267306e-05, "loss": 103.3749, "step": 4464, "task_loss": 3.550035238265991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9315492877314446, "compression/movement_sparsity/importance_threshold": -0.0004868966359592962, "compression/movement_sparsity/linear_layer_sparsity": 0.8840463174736181, "compression/movement_sparsity/model_sparsity": 0.8536766068238202, "compression_loss": 99.11248779296875, "distillation_loss": 4.209849834442139, "epoch": 3.77, "learning_rate": 3.4587207664130743e-05, "loss": 103.6615, "step": 4465, "task_loss": 2.927854299545288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9316905202431248, "compression/movement_sparsity/importance_threshold": -0.00048589203523935996, "compression/movement_sparsity/linear_layer_sparsity": 0.8840941095375026, "compression/movement_sparsity/model_sparsity": 0.8537227570832847, "compression_loss": 99.1269302368164, "distillation_loss": 3.6799213886260986, "epoch": 3.77, "learning_rate": 3.458251150558843e-05, "loss": 103.1955, "step": 4466, "task_loss": 1.352614402770996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9318315583535808, "compression/movement_sparsity/importance_threshold": -0.0004848888173144151, "compression/movement_sparsity/linear_layer_sparsity": 0.8840565245611143, "compression/movement_sparsity/model_sparsity": 0.8536864632664604, "compression_loss": 99.14134216308594, "distillation_loss": 3.1348071098327637, "epoch": 3.78, "learning_rate": 3.4577815347046116e-05, "loss": 103.313, "step": 4467, "task_loss": 1.8499433994293213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9319724021966976, "compression/movement_sparsity/importance_threshold": -0.00048388698123212354, "compression/movement_sparsity/linear_layer_sparsity": 0.8841613856913041, "compression/movement_sparsity/model_sparsity": 0.8537877220942375, "compression_loss": 99.15569305419922, "distillation_loss": 3.4927563667297363, "epoch": 3.78, "learning_rate": 3.45731191885038e-05, "loss": 104.1298, "step": 4468, "task_loss": 2.132392406463623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9321130519063607, "compression/movement_sparsity/importance_threshold": -0.0004828865260401464, "compression/movement_sparsity/linear_layer_sparsity": 0.8842608332493872, "compression/movement_sparsity/model_sparsity": 0.853883753322764, "compression_loss": 99.17005920410156, "distillation_loss": 5.578777313232422, "epoch": 3.78, "learning_rate": 3.4568423029961496e-05, "loss": 103.9253, "step": 4469, "task_loss": 3.6059207916259766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.932253507616455, "compression/movement_sparsity/importance_threshold": -0.00048188745078614655, "compression/movement_sparsity/linear_layer_sparsity": 0.8843519577384604, "compression/movement_sparsity/model_sparsity": 0.8539717474053059, "compression_loss": 99.1844711303711, "distillation_loss": 5.191614151000977, "epoch": 3.78, "learning_rate": 3.456372687141918e-05, "loss": 103.345, "step": 4470, "task_loss": 2.9228034019470215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9323937694608657, "compression/movement_sparsity/importance_threshold": -0.000480889754517785, "compression/movement_sparsity/linear_layer_sparsity": 0.8844060219145214, "compression/movement_sparsity/model_sparsity": 0.8540239543105983, "compression_loss": 99.1987533569336, "distillation_loss": 4.202188491821289, "epoch": 3.78, "learning_rate": 3.455903071287687e-05, "loss": 103.2854, "step": 4471, "task_loss": 2.310105562210083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9325338375734779, "compression/movement_sparsity/importance_threshold": -0.0004798934362827247, "compression/movement_sparsity/linear_layer_sparsity": 0.8845095236896008, "compression/movement_sparsity/model_sparsity": 0.8541239004812949, "compression_loss": 99.21300506591797, "distillation_loss": 5.550764083862305, "epoch": 3.78, "learning_rate": 3.4554334554334555e-05, "loss": 103.3724, "step": 4472, "task_loss": 3.1617605686187744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9326737120881767, "compression/movement_sparsity/importance_threshold": -0.00047889849512862664, "compression/movement_sparsity/linear_layer_sparsity": 0.8846214558511986, "compression/movement_sparsity/model_sparsity": 0.8542319874287982, "compression_loss": 99.22725677490234, "distillation_loss": 4.065619945526123, "epoch": 3.78, "learning_rate": 3.454963839579224e-05, "loss": 103.5559, "step": 4473, "task_loss": 1.8509092330932617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9328133931388474, "compression/movement_sparsity/importance_threshold": -0.00047790493010315283, "compression/movement_sparsity/linear_layer_sparsity": 0.8847072502373386, "compression/movement_sparsity/model_sparsity": 0.85431483451384, "compression_loss": 99.24152374267578, "distillation_loss": 4.019781112670898, "epoch": 3.78, "learning_rate": 3.4544942237249934e-05, "loss": 103.439, "step": 4474, "task_loss": 3.2693681716918945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.932952880859375, "compression/movement_sparsity/importance_threshold": -0.00047691274025396524, "compression/movement_sparsity/linear_layer_sparsity": 0.884819623593139, "compression/movement_sparsity/model_sparsity": 0.8544233474991677, "compression_loss": 99.2557373046875, "distillation_loss": 4.051258087158203, "epoch": 3.78, "learning_rate": 3.4540246078707614e-05, "loss": 103.6556, "step": 4475, "task_loss": 1.4835460186004639 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9330921753836448, "compression/movement_sparsity/importance_threshold": -0.00047592192462872496, "compression/movement_sparsity/linear_layer_sparsity": 0.8848656270318782, "compression/movement_sparsity/model_sparsity": 0.8544677705782631, "compression_loss": 99.26995086669922, "distillation_loss": 6.238205909729004, "epoch": 3.78, "learning_rate": 3.453554992016531e-05, "loss": 103.7392, "step": 4476, "task_loss": 3.370476245880127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9332312768455415, "compression/movement_sparsity/importance_threshold": -0.00047493248227509655, "compression/movement_sparsity/linear_layer_sparsity": 0.8849376728527341, "compression/movement_sparsity/model_sparsity": 0.8545373414035337, "compression_loss": 99.28418731689453, "distillation_loss": 4.273415565490723, "epoch": 3.78, "learning_rate": 3.453085376162299e-05, "loss": 103.4085, "step": 4477, "task_loss": 2.771115779876709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9333701853789509, "compression/movement_sparsity/importance_threshold": -0.00047394441224073765, "compression/movement_sparsity/linear_layer_sparsity": 0.8850217501587345, "compression/movement_sparsity/model_sparsity": 0.854618530395421, "compression_loss": 99.2983169555664, "distillation_loss": 4.486677169799805, "epoch": 3.78, "learning_rate": 3.4526157603080686e-05, "loss": 103.0415, "step": 4478, "task_loss": 3.070375919342041 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9335089011177578, "compression/movement_sparsity/importance_threshold": -0.00047295771357331283, "compression/movement_sparsity/linear_layer_sparsity": 0.8851887123539719, "compression/movement_sparsity/model_sparsity": 0.8547797569256161, "compression_loss": 99.31242370605469, "distillation_loss": 3.580758571624756, "epoch": 3.79, "learning_rate": 3.4521461444538366e-05, "loss": 102.8995, "step": 4479, "task_loss": 2.23197078704834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9336474241958472, "compression/movement_sparsity/importance_threshold": -0.00047197238532048404, "compression/movement_sparsity/linear_layer_sparsity": 0.8853751944116292, "compression/movement_sparsity/model_sparsity": 0.854959832750907, "compression_loss": 99.3265380859375, "distillation_loss": 6.157516002655029, "epoch": 3.79, "learning_rate": 3.451676528599606e-05, "loss": 104.0581, "step": 4480, "task_loss": 3.1862757205963135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9337857547471043, "compression/movement_sparsity/importance_threshold": -0.00047098842652991326, "compression/movement_sparsity/linear_layer_sparsity": 0.8853812995854587, "compression/movement_sparsity/model_sparsity": 0.8549657281932338, "compression_loss": 99.34066772460938, "distillation_loss": 4.558821678161621, "epoch": 3.79, "learning_rate": 3.4512069127453745e-05, "loss": 103.7763, "step": 4481, "task_loss": 2.1922099590301514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9339238929054146, "compression/movement_sparsity/importance_threshold": -0.00047000583624925985, "compression/movement_sparsity/linear_layer_sparsity": 0.8854773845282685, "compression/movement_sparsity/model_sparsity": 0.8550585123226662, "compression_loss": 99.35474395751953, "distillation_loss": 4.876386642456055, "epoch": 3.79, "learning_rate": 3.450737296891143e-05, "loss": 104.1648, "step": 4482, "task_loss": 2.428880214691162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9340618388046628, "compression/movement_sparsity/importance_threshold": -0.00046902461352618836, "compression/movement_sparsity/linear_layer_sparsity": 0.8855417154126639, "compression/movement_sparsity/model_sparsity": 0.8551206332432778, "compression_loss": 99.36882019042969, "distillation_loss": 4.912492752075195, "epoch": 3.79, "learning_rate": 3.4502676810369125e-05, "loss": 103.5714, "step": 4483, "task_loss": 2.373748540878296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9341995925787342, "compression/movement_sparsity/importance_threshold": -0.0004680447574083599, "compression/movement_sparsity/linear_layer_sparsity": 0.8855523159976922, "compression/movement_sparsity/model_sparsity": 0.8551308696655991, "compression_loss": 99.38285827636719, "distillation_loss": 3.2927117347717285, "epoch": 3.79, "learning_rate": 3.4497980651826804e-05, "loss": 103.2592, "step": 4484, "task_loss": 1.5120606422424316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.934337154361514, "compression/movement_sparsity/importance_threshold": -0.0004670662669434356, "compression/movement_sparsity/linear_layer_sparsity": 0.8855387582190902, "compression/movement_sparsity/model_sparsity": 0.8551177776384007, "compression_loss": 99.39688873291016, "distillation_loss": 4.0350341796875, "epoch": 3.79, "learning_rate": 3.44932844932845e-05, "loss": 103.4222, "step": 4485, "task_loss": 2.78372859954834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9344745242868874, "compression/movement_sparsity/importance_threshold": -0.00046608914117907737, "compression/movement_sparsity/linear_layer_sparsity": 0.8856500583998034, "compression/movement_sparsity/model_sparsity": 0.855225254315507, "compression_loss": 99.4108657836914, "distillation_loss": 5.984676361083984, "epoch": 3.79, "learning_rate": 3.4488588334742184e-05, "loss": 103.9235, "step": 4486, "task_loss": 4.025545120239258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9346117024887393, "compression/movement_sparsity/importance_threshold": -0.00046511337916294807, "compression/movement_sparsity/linear_layer_sparsity": 0.8857105139297172, "compression/movement_sparsity/model_sparsity": 0.8552836330119853, "compression_loss": 99.42478942871094, "distillation_loss": 4.825526714324951, "epoch": 3.79, "learning_rate": 3.448389217619987e-05, "loss": 104.0551, "step": 4487, "task_loss": 3.6400630474090576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.934748689100955, "compression/movement_sparsity/importance_threshold": -0.00046413897994270967, "compression/movement_sparsity/linear_layer_sparsity": 0.8857416121589116, "compression/movement_sparsity/model_sparsity": 0.8553136629213375, "compression_loss": 99.43875885009766, "distillation_loss": 3.6228725910186768, "epoch": 3.79, "learning_rate": 3.4479196017657556e-05, "loss": 103.4938, "step": 4488, "task_loss": 3.1806387901306152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9348854842574196, "compression/movement_sparsity/importance_threshold": -0.0004631659425660224, "compression/movement_sparsity/linear_layer_sparsity": 0.885898486508329, "compression/movement_sparsity/model_sparsity": 0.8554651481542505, "compression_loss": 99.45264434814453, "distillation_loss": 4.375467300415039, "epoch": 3.79, "learning_rate": 3.447449985911524e-05, "loss": 103.5064, "step": 4489, "task_loss": 1.915402889251709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9350220880920183, "compression/movement_sparsity/importance_threshold": -0.00046219426608054906, "compression/movement_sparsity/linear_layer_sparsity": 0.8860787918471509, "compression/movement_sparsity/model_sparsity": 0.8556392594499996, "compression_loss": 99.46648406982422, "distillation_loss": 4.011748790740967, "epoch": 3.79, "learning_rate": 3.4469803700572936e-05, "loss": 104.0297, "step": 4490, "task_loss": 2.814131259918213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9351585007386362, "compression/movement_sparsity/importance_threshold": -0.00046122394953395165, "compression/movement_sparsity/linear_layer_sparsity": 0.8861213730497786, "compression/movement_sparsity/model_sparsity": 0.855680377857322, "compression_loss": 99.48041534423828, "distillation_loss": 4.525297164916992, "epoch": 3.8, "learning_rate": 3.446510754203062e-05, "loss": 103.6927, "step": 4491, "task_loss": 2.4270339012145996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9352947223311583, "compression/movement_sparsity/importance_threshold": -0.000460254991973893, "compression/movement_sparsity/linear_layer_sparsity": 0.886202111588841, "compression/movement_sparsity/model_sparsity": 0.8557583427791868, "compression_loss": 99.49427795410156, "distillation_loss": 4.287351131439209, "epoch": 3.8, "learning_rate": 3.446041138348831e-05, "loss": 103.6553, "step": 4492, "task_loss": 3.0528783798217773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.93543075300347, "compression/movement_sparsity/importance_threshold": -0.00045928739244803247, "compression/movement_sparsity/linear_layer_sparsity": 0.8862988046642002, "compression/movement_sparsity/model_sparsity": 0.8558517141499448, "compression_loss": 99.50819396972656, "distillation_loss": 5.037957191467285, "epoch": 3.8, "learning_rate": 3.4455715224945995e-05, "loss": 104.1974, "step": 4493, "task_loss": 2.025144100189209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9355665928894563, "compression/movement_sparsity/importance_threshold": -0.00045832115000403375, "compression/movement_sparsity/linear_layer_sparsity": 0.886308665950835, "compression/movement_sparsity/model_sparsity": 0.8558612366710469, "compression_loss": 99.52202606201172, "distillation_loss": 3.65324068069458, "epoch": 3.8, "learning_rate": 3.445101906640368e-05, "loss": 103.624, "step": 4494, "task_loss": 1.4790900945663452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9357022421230023, "compression/movement_sparsity/importance_threshold": -0.0004573562636895588, "compression/movement_sparsity/linear_layer_sparsity": 0.8864537234501253, "compression/movement_sparsity/model_sparsity": 0.8560013109989875, "compression_loss": 99.53585815429688, "distillation_loss": 5.0295257568359375, "epoch": 3.8, "learning_rate": 3.4446322907861374e-05, "loss": 103.6343, "step": 4495, "task_loss": 2.98004412651062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9358377008379933, "compression/movement_sparsity/importance_threshold": -0.0004563927325522679, "compression/movement_sparsity/linear_layer_sparsity": 0.8865458018726095, "compression/movement_sparsity/model_sparsity": 0.8560902262443929, "compression_loss": 99.54959869384766, "distillation_loss": 5.567374229431152, "epoch": 3.8, "learning_rate": 3.444162674931906e-05, "loss": 104.5328, "step": 4496, "task_loss": 2.494520664215088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9359729691683143, "compression/movement_sparsity/importance_threshold": -0.0004554305556398247, "compression/movement_sparsity/linear_layer_sparsity": 0.8867410243451438, "compression/movement_sparsity/model_sparsity": 0.8562787422244211, "compression_loss": 99.56343841552734, "distillation_loss": 3.7882282733917236, "epoch": 3.8, "learning_rate": 3.443693059077675e-05, "loss": 104.0657, "step": 4497, "task_loss": 1.9319944381713867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9361080472478505, "compression/movement_sparsity/importance_threshold": -0.0004544697319998894, "compression/movement_sparsity/linear_layer_sparsity": 0.8868692091472293, "compression/movement_sparsity/model_sparsity": 0.8564025234842124, "compression_loss": 99.57719421386719, "distillation_loss": 5.9759626388549805, "epoch": 3.8, "learning_rate": 3.443223443223443e-05, "loss": 103.9692, "step": 4498, "task_loss": 2.9346258640289307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.936242935210487, "compression/movement_sparsity/importance_threshold": -0.0004535102606801258, "compression/movement_sparsity/linear_layer_sparsity": 0.8869502934871532, "compression/movement_sparsity/model_sparsity": 0.8564808223276152, "compression_loss": 99.59085083007812, "distillation_loss": 5.530453681945801, "epoch": 3.8, "learning_rate": 3.442753827369212e-05, "loss": 104.4224, "step": 4499, "task_loss": 1.9301621913909912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9363776331901089, "compression/movement_sparsity/importance_threshold": -0.00045255214072819315, "compression/movement_sparsity/linear_layer_sparsity": 0.887037995740115, "compression/movement_sparsity/model_sparsity": 0.8565655117383841, "compression_loss": 99.60456085205078, "distillation_loss": 3.527066230773926, "epoch": 3.8, "learning_rate": 3.442284211514981e-05, "loss": 103.798, "step": 4500, "task_loss": 1.6852211952209473 }, { "epoch": 3.8, "eval_accuracy": 0.5634059405940594, "eval_loss": 103.44270324707031, "eval_runtime": 227.1107, "eval_samples_per_second": 111.179, "eval_steps_per_second": 0.872, "step": 4500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9365121413206015, "compression/movement_sparsity/importance_threshold": -0.0004515953711917561, "compression/movement_sparsity/linear_layer_sparsity": 0.887085346609797, "compression/movement_sparsity/model_sparsity": 0.8566112359600242, "compression_loss": 99.61823272705078, "distillation_loss": 4.633009910583496, "epoch": 3.8, "learning_rate": 3.441814595660749e-05, "loss": 104.6206, "step": 4501, "task_loss": 2.1609108448028564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9366464597358498, "compression/movement_sparsity/importance_threshold": -0.00045063995111847485, "compression/movement_sparsity/linear_layer_sparsity": 0.8871514661193378, "compression/movement_sparsity/model_sparsity": 0.856675084061005, "compression_loss": 99.63186645507812, "distillation_loss": 3.6572651863098145, "epoch": 3.81, "learning_rate": 3.4413449798065185e-05, "loss": 103.499, "step": 4502, "task_loss": 2.6906683444976807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.936780588569739, "compression/movement_sparsity/importance_threshold": -0.00044968587955601137, "compression/movement_sparsity/linear_layer_sparsity": 0.8872659262044745, "compression/movement_sparsity/model_sparsity": 0.8567856120900967, "compression_loss": 99.64546966552734, "distillation_loss": 4.289264678955078, "epoch": 3.81, "learning_rate": 3.440875363952287e-05, "loss": 103.4218, "step": 4503, "task_loss": 2.8200807571411133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9369145279561542, "compression/movement_sparsity/importance_threshold": -0.0004487331555520285, "compression/movement_sparsity/linear_layer_sparsity": 0.887465858723225, "compression/movement_sparsity/model_sparsity": 0.8569786763117638, "compression_loss": 99.65912628173828, "distillation_loss": 4.1431732177734375, "epoch": 3.81, "learning_rate": 3.4404057480980565e-05, "loss": 103.5824, "step": 4504, "task_loss": 1.8430571556091309 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9370482780289806, "compression/movement_sparsity/importance_threshold": -0.0004477817781541856, "compression/movement_sparsity/linear_layer_sparsity": 0.8875557192505289, "compression/movement_sparsity/model_sparsity": 0.8570654498535115, "compression_loss": 99.6727294921875, "distillation_loss": 4.340119361877441, "epoch": 3.81, "learning_rate": 3.4399361322438244e-05, "loss": 103.4947, "step": 4505, "task_loss": 2.1876487731933594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9371818389221033, "compression/movement_sparsity/importance_threshold": -0.00044683174641014806, "compression/movement_sparsity/linear_layer_sparsity": 0.8876329163118034, "compression/movement_sparsity/model_sparsity": 0.8571399949582453, "compression_loss": 99.6863021850586, "distillation_loss": 4.543083190917969, "epoch": 3.81, "learning_rate": 3.439466516389593e-05, "loss": 104.0484, "step": 4506, "task_loss": 2.56125807762146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9373152107694074, "compression/movement_sparsity/importance_threshold": -0.0004458830593675753, "compression/movement_sparsity/linear_layer_sparsity": 0.8876961024761059, "compression/movement_sparsity/model_sparsity": 0.8572010104834207, "compression_loss": 99.6998291015625, "distillation_loss": 5.941764831542969, "epoch": 3.81, "learning_rate": 3.4389969005353624e-05, "loss": 104.5815, "step": 4507, "task_loss": 3.3477067947387695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9374483937047781, "compression/movement_sparsity/importance_threshold": -0.00044493571607413015, "compression/movement_sparsity/linear_layer_sparsity": 0.8877532192390817, "compression/movement_sparsity/model_sparsity": 0.8572561651098765, "compression_loss": 99.71339416503906, "distillation_loss": 3.714327335357666, "epoch": 3.81, "learning_rate": 3.438527284681131e-05, "loss": 104.3216, "step": 4508, "task_loss": 1.895623803138733 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9375813878621005, "compression/movement_sparsity/importance_threshold": -0.0004439897155774728, "compression/movement_sparsity/linear_layer_sparsity": 0.8877735618690684, "compression/movement_sparsity/model_sparsity": 0.857275808907942, "compression_loss": 99.72688293457031, "distillation_loss": 5.276263236999512, "epoch": 3.81, "learning_rate": 3.4380576688268997e-05, "loss": 103.8953, "step": 4509, "task_loss": 2.8172950744628906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9377141933752599, "compression/movement_sparsity/importance_threshold": -0.00044304505692526787, "compression/movement_sparsity/linear_layer_sparsity": 0.887787036178497, "compression/movement_sparsity/model_sparsity": 0.8572888203333898, "compression_loss": 99.74038696289062, "distillation_loss": 6.747661590576172, "epoch": 3.81, "learning_rate": 3.437588052972668e-05, "loss": 104.0794, "step": 4510, "task_loss": 3.11934494972229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9378468103781411, "compression/movement_sparsity/importance_threshold": -0.00044210173916517555, "compression/movement_sparsity/linear_layer_sparsity": 0.8877460766626678, "compression/movement_sparsity/model_sparsity": 0.8572492679029355, "compression_loss": 99.75384521484375, "distillation_loss": 4.707151889801025, "epoch": 3.81, "learning_rate": 3.4371184371184376e-05, "loss": 104.2114, "step": 4511, "task_loss": 2.785715103149414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9379792390046297, "compression/movement_sparsity/importance_threshold": -0.00044115976134485695, "compression/movement_sparsity/linear_layer_sparsity": 0.8878129116222668, "compression/movement_sparsity/model_sparsity": 0.857313806876064, "compression_loss": 99.76728057861328, "distillation_loss": 3.4344329833984375, "epoch": 3.81, "learning_rate": 3.436648821264206e-05, "loss": 103.8644, "step": 4512, "task_loss": 2.864198923110962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9381114793886104, "compression/movement_sparsity/importance_threshold": -0.00044021912251197576, "compression/movement_sparsity/linear_layer_sparsity": 0.8878130547122784, "compression/movement_sparsity/model_sparsity": 0.8573139450504935, "compression_loss": 99.78073120117188, "distillation_loss": 6.648244857788086, "epoch": 3.81, "learning_rate": 3.436179205409975e-05, "loss": 104.2787, "step": 4513, "task_loss": 3.1629021167755127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9382435316639686, "compression/movement_sparsity/importance_threshold": -0.0004392798217141931, "compression/movement_sparsity/linear_layer_sparsity": 0.8878176812893211, "compression/movement_sparsity/model_sparsity": 0.8573184126903818, "compression_loss": 99.79414367675781, "distillation_loss": 4.511252403259277, "epoch": 3.82, "learning_rate": 3.4357095895557435e-05, "loss": 104.5738, "step": 4514, "task_loss": 1.9913476705551147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9383753959645893, "compression/movement_sparsity/importance_threshold": -0.00043834185799917005, "compression/movement_sparsity/linear_layer_sparsity": 0.8878042785248984, "compression/movement_sparsity/model_sparsity": 0.8573054703521487, "compression_loss": 99.80755615234375, "distillation_loss": 4.036492347717285, "epoch": 3.82, "learning_rate": 3.435239973701512e-05, "loss": 104.0165, "step": 4515, "task_loss": 2.268681526184082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9385070724243578, "compression/movement_sparsity/importance_threshold": -0.0004374052304145686, "compression/movement_sparsity/linear_layer_sparsity": 0.8878259924341634, "compression/movement_sparsity/model_sparsity": 0.8573264383218305, "compression_loss": 99.82096099853516, "distillation_loss": 4.047816753387451, "epoch": 3.82, "learning_rate": 3.4347703578472815e-05, "loss": 104.1688, "step": 4516, "task_loss": 1.827858567237854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9386385611771592, "compression/movement_sparsity/importance_threshold": -0.0004364699380080524, "compression/movement_sparsity/linear_layer_sparsity": 0.8879056577981386, "compression/movement_sparsity/model_sparsity": 0.8574033669354739, "compression_loss": 99.8343276977539, "distillation_loss": 4.498531818389893, "epoch": 3.82, "learning_rate": 3.43430074199305e-05, "loss": 104.5498, "step": 4517, "task_loss": 2.109300374984741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9387698623568784, "compression/movement_sparsity/importance_threshold": -0.0004355359798272817, "compression/movement_sparsity/linear_layer_sparsity": 0.8879606759076104, "compression/movement_sparsity/model_sparsity": 0.8574564950036299, "compression_loss": 99.8476333618164, "distillation_loss": 5.6322407722473145, "epoch": 3.82, "learning_rate": 3.433831126138819e-05, "loss": 105.1365, "step": 4518, "task_loss": 2.7231855392456055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9389009760974009, "compression/movement_sparsity/importance_threshold": -0.00043460335491991766, "compression/movement_sparsity/linear_layer_sparsity": 0.8880234566502132, "compression/movement_sparsity/model_sparsity": 0.8575171190345882, "compression_loss": 99.8609848022461, "distillation_loss": 2.271437644958496, "epoch": 3.82, "learning_rate": 3.4333615102845874e-05, "loss": 103.8392, "step": 4519, "task_loss": 1.0792218446731567 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9390319025326116, "compression/movement_sparsity/importance_threshold": -0.0004336720623336239, "compression/movement_sparsity/linear_layer_sparsity": 0.888096849902012, "compression/movement_sparsity/model_sparsity": 0.8575879910024036, "compression_loss": 99.87425994873047, "distillation_loss": 4.555983543395996, "epoch": 3.82, "learning_rate": 3.432891894430356e-05, "loss": 103.7634, "step": 4520, "task_loss": 2.097593069076538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9391626417963957, "compression/movement_sparsity/importance_threshold": -0.00043274210111606155, "compression/movement_sparsity/linear_layer_sparsity": 0.8881397053604952, "compression/movement_sparsity/model_sparsity": 0.8576293742440492, "compression_loss": 99.88760375976562, "distillation_loss": 5.063635349273682, "epoch": 3.82, "learning_rate": 3.432422278576125e-05, "loss": 104.3344, "step": 4521, "task_loss": 2.753614902496338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9392931940226384, "compression/movement_sparsity/importance_threshold": -0.0004318134703148917, "compression/movement_sparsity/linear_layer_sparsity": 0.8882443160831647, "compression/movement_sparsity/model_sparsity": 0.8577303912665747, "compression_loss": 99.90081787109375, "distillation_loss": 4.448860168457031, "epoch": 3.82, "learning_rate": 3.431952662721893e-05, "loss": 103.5074, "step": 4522, "task_loss": 1.7623388767242432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9394235593452248, "compression/movement_sparsity/importance_threshold": -0.00043088616897777807, "compression/movement_sparsity/linear_layer_sparsity": 0.8883664672564264, "compression/movement_sparsity/model_sparsity": 0.8578483461712539, "compression_loss": 99.91413116455078, "distillation_loss": 4.56204891204834, "epoch": 3.82, "learning_rate": 3.4314830468676626e-05, "loss": 104.7802, "step": 4523, "task_loss": 2.3873403072357178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.93955373789804, "compression/movement_sparsity/importance_threshold": -0.00042996019615238086, "compression/movement_sparsity/linear_layer_sparsity": 0.8883180909083278, "compression/movement_sparsity/model_sparsity": 0.8578016316995355, "compression_loss": 99.92731475830078, "distillation_loss": 4.642655372619629, "epoch": 3.82, "learning_rate": 3.431013431013431e-05, "loss": 104.3709, "step": 4524, "task_loss": 3.2190146446228027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9396837298149692, "compression/movement_sparsity/importance_threshold": -0.00042903555088636205, "compression/movement_sparsity/linear_layer_sparsity": 0.8883510373835056, "compression/movement_sparsity/model_sparsity": 0.8578334463619358, "compression_loss": 99.94055938720703, "distillation_loss": 4.579089164733887, "epoch": 3.82, "learning_rate": 3.4305438151592e-05, "loss": 104.1976, "step": 4525, "task_loss": 2.2276077270507812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9398135352298975, "compression/movement_sparsity/importance_threshold": -0.0004281122322273836, "compression/movement_sparsity/linear_layer_sparsity": 0.8884089053690425, "compression/movement_sparsity/model_sparsity": 0.8578893264041467, "compression_loss": 99.95378875732422, "distillation_loss": 4.2005486488342285, "epoch": 3.83, "learning_rate": 3.4300741993049685e-05, "loss": 103.8489, "step": 4526, "task_loss": 2.0534112453460693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9399431542767102, "compression/movement_sparsity/importance_threshold": -0.0004271902392231075, "compression/movement_sparsity/linear_layer_sparsity": 0.8884716622633101, "compression/movement_sparsity/model_sparsity": 0.8579499274060334, "compression_loss": 99.96699523925781, "distillation_loss": 4.86107873916626, "epoch": 3.83, "learning_rate": 3.429604583450737e-05, "loss": 104.3527, "step": 4527, "task_loss": 3.007263660430908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9400725870892921, "compression/movement_sparsity/importance_threshold": -0.00042626957092119656, "compression/movement_sparsity/linear_layer_sparsity": 0.888514660811805, "compression/movement_sparsity/model_sparsity": 0.8579914488221084, "compression_loss": 99.98018646240234, "distillation_loss": 2.595142126083374, "epoch": 3.83, "learning_rate": 3.4291349675965064e-05, "loss": 103.8501, "step": 4528, "task_loss": 2.589855432510376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9402018338015287, "compression/movement_sparsity/importance_threshold": -0.000425350226369311, "compression/movement_sparsity/linear_layer_sparsity": 0.8886370385442518, "compression/movement_sparsity/model_sparsity": 0.8581096225029679, "compression_loss": 99.99340057373047, "distillation_loss": 4.938940048217773, "epoch": 3.83, "learning_rate": 3.428665351742275e-05, "loss": 104.3159, "step": 4529, "task_loss": 2.2029666900634766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9403308945473048, "compression/movement_sparsity/importance_threshold": -0.00042443220461511454, "compression/movement_sparsity/linear_layer_sparsity": 0.8887507474068275, "compression/movement_sparsity/model_sparsity": 0.8582194251163046, "compression_loss": 100.00654602050781, "distillation_loss": 3.2351438999176025, "epoch": 3.83, "learning_rate": 3.428195735888044e-05, "loss": 103.4455, "step": 4530, "task_loss": 2.007089614868164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9404597694605058, "compression/movement_sparsity/importance_threshold": -0.00042351550470626654, "compression/movement_sparsity/linear_layer_sparsity": 0.8888831175917531, "compression/movement_sparsity/model_sparsity": 0.8583472479781598, "compression_loss": 100.01962280273438, "distillation_loss": 4.232966423034668, "epoch": 3.83, "learning_rate": 3.427726120033812e-05, "loss": 104.5872, "step": 4531, "task_loss": 2.908407211303711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9405884586750167, "compression/movement_sparsity/importance_threshold": -0.00042260012569043156, "compression/movement_sparsity/linear_layer_sparsity": 0.8889271535428324, "compression/movement_sparsity/model_sparsity": 0.8583897711588491, "compression_loss": 100.0327377319336, "distillation_loss": 3.295498847961426, "epoch": 3.83, "learning_rate": 3.427256504179581e-05, "loss": 104.3928, "step": 4532, "task_loss": 1.364562749862671 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9407169623247228, "compression/movement_sparsity/importance_threshold": -0.00042168606661526983, "compression/movement_sparsity/linear_layer_sparsity": 0.8889321855415747, "compression/movement_sparsity/model_sparsity": 0.8583946302929543, "compression_loss": 100.0457763671875, "distillation_loss": 4.457124710083008, "epoch": 3.83, "learning_rate": 3.42678688832535e-05, "loss": 104.4439, "step": 4533, "task_loss": 1.4483850002288818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9408452805435091, "compression/movement_sparsity/importance_threshold": -0.00042077332652844246, "compression/movement_sparsity/linear_layer_sparsity": 0.8890759075340897, "compression/movement_sparsity/model_sparsity": 0.8585334149928859, "compression_loss": 100.05886840820312, "distillation_loss": 3.0908942222595215, "epoch": 3.83, "learning_rate": 3.426317272471119e-05, "loss": 104.2352, "step": 4534, "task_loss": 1.897392749786377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9409734134652609, "compression/movement_sparsity/importance_threshold": -0.00041986190447761314, "compression/movement_sparsity/linear_layer_sparsity": 0.8891847036395993, "compression/movement_sparsity/model_sparsity": 0.8586384736174754, "compression_loss": 100.07186889648438, "distillation_loss": 5.28373908996582, "epoch": 3.83, "learning_rate": 3.4258476566168875e-05, "loss": 104.8839, "step": 4535, "task_loss": 3.3743138313293457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9411013612238631, "compression/movement_sparsity/importance_threshold": -0.00041895179951044297, "compression/movement_sparsity/linear_layer_sparsity": 0.8892533033760085, "compression/movement_sparsity/model_sparsity": 0.8587047167419013, "compression_loss": 100.08484649658203, "distillation_loss": 5.646077632904053, "epoch": 3.83, "learning_rate": 3.425378040762656e-05, "loss": 104.1765, "step": 4536, "task_loss": 2.158766746520996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.941229123953201, "compression/movement_sparsity/importance_threshold": -0.00041804301067459304, "compression/movement_sparsity/linear_layer_sparsity": 0.8893515823656633, "compression/movement_sparsity/model_sparsity": 0.8587996195459199, "compression_loss": 100.09783935546875, "distillation_loss": 6.214035987854004, "epoch": 3.83, "learning_rate": 3.424908424908425e-05, "loss": 105.5323, "step": 4537, "task_loss": 2.1191725730895996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9413567017871597, "compression/movement_sparsity/importance_threshold": -0.00041713553701772706, "compression/movement_sparsity/linear_layer_sparsity": 0.8893792702829137, "compression/movement_sparsity/model_sparsity": 0.8588263562980348, "compression_loss": 100.11080169677734, "distillation_loss": 4.77935791015625, "epoch": 3.84, "learning_rate": 3.424438809054194e-05, "loss": 104.2481, "step": 4538, "task_loss": 2.666339159011841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9414840948596243, "compression/movement_sparsity/importance_threshold": -0.00041622937758750613, "compression/movement_sparsity/linear_layer_sparsity": 0.889379461069596, "compression/movement_sparsity/model_sparsity": 0.8588265405306076, "compression_loss": 100.12371826171875, "distillation_loss": 5.5806732177734375, "epoch": 3.84, "learning_rate": 3.423969193199962e-05, "loss": 104.7881, "step": 4539, "task_loss": 3.1689510345458984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9416113033044801, "compression/movement_sparsity/importance_threshold": -0.0004153245314315896, "compression/movement_sparsity/linear_layer_sparsity": 0.8894644923090073, "compression/movement_sparsity/model_sparsity": 0.8589086506853584, "compression_loss": 100.13667297363281, "distillation_loss": 5.465449333190918, "epoch": 3.84, "learning_rate": 3.4234995773457314e-05, "loss": 104.8909, "step": 4540, "task_loss": 3.9341466426849365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.941738327255612, "compression/movement_sparsity/importance_threshold": -0.00041442099759764293, "compression/movement_sparsity/linear_layer_sparsity": 0.8895228372612496, "compression/movement_sparsity/model_sparsity": 0.8589649913090011, "compression_loss": 100.14958953857422, "distillation_loss": 4.828695297241211, "epoch": 3.84, "learning_rate": 3.4230299614915e-05, "loss": 104.5341, "step": 4541, "task_loss": 3.2739431858062744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9418651668469054, "compression/movement_sparsity/importance_threshold": -0.0004135187751333263, "compression/movement_sparsity/linear_layer_sparsity": 0.8896002131850387, "compression/movement_sparsity/model_sparsity": 0.8590397091317719, "compression_loss": 100.16249084472656, "distillation_loss": 4.202974796295166, "epoch": 3.84, "learning_rate": 3.422560345637269e-05, "loss": 104.5119, "step": 4542, "task_loss": 2.2653863430023193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9419918222122452, "compression/movement_sparsity/importance_threshold": -0.00041261786308630175, "compression/movement_sparsity/linear_layer_sparsity": 0.8896732964084789, "compression/movement_sparsity/model_sparsity": 0.8591102817216566, "compression_loss": 100.1754150390625, "distillation_loss": 5.321660995483398, "epoch": 3.84, "learning_rate": 3.422090729783038e-05, "loss": 104.8519, "step": 4543, "task_loss": 4.139326572418213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9421182934855167, "compression/movement_sparsity/importance_threshold": -0.0004117182605042303, "compression/movement_sparsity/linear_layer_sparsity": 0.8897846085133596, "compression/movement_sparsity/model_sparsity": 0.8592177699132987, "compression_loss": 100.18827819824219, "distillation_loss": 4.430062294006348, "epoch": 3.84, "learning_rate": 3.421621113928806e-05, "loss": 104.7694, "step": 4544, "task_loss": 2.524355173110962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9422445808006049, "compression/movement_sparsity/importance_threshold": -0.0004108199664347757, "compression/movement_sparsity/linear_layer_sparsity": 0.8898272731851607, "compression/movement_sparsity/model_sparsity": 0.8592589689223715, "compression_loss": 100.2011489868164, "distillation_loss": 4.069216728210449, "epoch": 3.84, "learning_rate": 3.421151498074575e-05, "loss": 104.9836, "step": 4545, "task_loss": 1.2135066986083984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9423706842913951, "compression/movement_sparsity/importance_threshold": -0.0004099229799255982, "compression/movement_sparsity/linear_layer_sparsity": 0.8899512010594003, "compression/movement_sparsity/model_sparsity": 0.8593786394928842, "compression_loss": 100.21400451660156, "distillation_loss": 3.9902749061584473, "epoch": 3.84, "learning_rate": 3.420681882220344e-05, "loss": 105.2269, "step": 4546, "task_loss": 2.899770736694336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9424966040917724, "compression/movement_sparsity/importance_threshold": -0.00040902730002436057, "compression/movement_sparsity/linear_layer_sparsity": 0.8900578508147354, "compression/movement_sparsity/model_sparsity": 0.8594816255010306, "compression_loss": 100.22686004638672, "distillation_loss": 6.624506950378418, "epoch": 3.84, "learning_rate": 3.420212266366113e-05, "loss": 105.6269, "step": 4547, "task_loss": 3.1837403774261475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9426223403356219, "compression/movement_sparsity/importance_threshold": -0.00040813292577872393, "compression/movement_sparsity/linear_layer_sparsity": 0.8900931582751053, "compression/movement_sparsity/model_sparsity": 0.8595157200415182, "compression_loss": 100.23970031738281, "distillation_loss": 4.634982109069824, "epoch": 3.84, "learning_rate": 3.419742650511881e-05, "loss": 104.3597, "step": 4548, "task_loss": 2.8498611450195312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9427478931568287, "compression/movement_sparsity/importance_threshold": -0.00040723985623635113, "compression/movement_sparsity/linear_layer_sparsity": 0.8901200591972918, "compression/movement_sparsity/model_sparsity": 0.8595416968342707, "compression_loss": 100.25247955322266, "distillation_loss": 3.097057819366455, "epoch": 3.84, "learning_rate": 3.4192730346576504e-05, "loss": 104.1156, "step": 4549, "task_loss": 1.8950320482254028 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9428732626892781, "compression/movement_sparsity/importance_threshold": -0.0004063480904449024, "compression/movement_sparsity/linear_layer_sparsity": 0.8901229448458596, "compression/movement_sparsity/model_sparsity": 0.859544483351933, "compression_loss": 100.26525115966797, "distillation_loss": 5.633776664733887, "epoch": 3.85, "learning_rate": 3.418803418803419e-05, "loss": 104.5734, "step": 4550, "task_loss": 1.7653906345367432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9429984490668549, "compression/movement_sparsity/importance_threshold": -0.0004054576274520414, "compression/movement_sparsity/linear_layer_sparsity": 0.890148080991236, "compression/movement_sparsity/model_sparsity": 0.8595687559933879, "compression_loss": 100.2780532836914, "distillation_loss": 3.4308760166168213, "epoch": 3.85, "learning_rate": 3.418333802949188e-05, "loss": 104.6589, "step": 4551, "task_loss": 1.7538673877716064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9431234524234448, "compression/movement_sparsity/importance_threshold": -0.0004045684663054284, "compression/movement_sparsity/linear_layer_sparsity": 0.8901426554949617, "compression/movement_sparsity/model_sparsity": 0.8595635168796013, "compression_loss": 100.290771484375, "distillation_loss": 4.776997089385986, "epoch": 3.85, "learning_rate": 3.4178641870949563e-05, "loss": 104.467, "step": 4552, "task_loss": 2.6164679527282715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9432482728929324, "compression/movement_sparsity/importance_threshold": -0.00040368060605272624, "compression/movement_sparsity/linear_layer_sparsity": 0.8902628988014017, "compression/movement_sparsity/model_sparsity": 0.8596796294585535, "compression_loss": 100.30347442626953, "distillation_loss": 4.225254058837891, "epoch": 3.85, "learning_rate": 3.417394571240725e-05, "loss": 105.3157, "step": 4553, "task_loss": 2.4298348426818848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.943372910609203, "compression/movement_sparsity/importance_threshold": -0.00040279404574159686, "compression/movement_sparsity/linear_layer_sparsity": 0.8902189463194959, "compression/movement_sparsity/model_sparsity": 0.8596371868796148, "compression_loss": 100.31616973876953, "distillation_loss": 5.482378959655762, "epoch": 3.85, "learning_rate": 3.416924955386494e-05, "loss": 105.0514, "step": 4554, "task_loss": 1.8625503778457642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.943497365706142, "compression/movement_sparsity/importance_threshold": -0.00040190878441970137, "compression/movement_sparsity/linear_layer_sparsity": 0.8903223050045637, "compression/movement_sparsity/model_sparsity": 0.8597369948758818, "compression_loss": 100.32884216308594, "distillation_loss": 3.597015142440796, "epoch": 3.85, "learning_rate": 3.416455339532263e-05, "loss": 104.4333, "step": 4555, "task_loss": 2.8146142959594727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9436216383176342, "compression/movement_sparsity/importance_threshold": -0.0004010248211347026, "compression/movement_sparsity/linear_layer_sparsity": 0.890405106424627, "compression/movement_sparsity/model_sparsity": 0.8598169518124392, "compression_loss": 100.34146881103516, "distillation_loss": 3.5693109035491943, "epoch": 3.85, "learning_rate": 3.4159857236780316e-05, "loss": 103.6696, "step": 4556, "task_loss": 1.8460865020751953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9437457285775649, "compression/movement_sparsity/importance_threshold": -0.00040014215493426163, "compression/movement_sparsity/linear_layer_sparsity": 0.8904108181009246, "compression/movement_sparsity/model_sparsity": 0.8598224672750847, "compression_loss": 100.35403442382812, "distillation_loss": 4.271231174468994, "epoch": 3.85, "learning_rate": 3.4155161078238e-05, "loss": 104.6954, "step": 4557, "task_loss": 2.153414249420166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9438696366198193, "compression/movement_sparsity/importance_threshold": -0.0003992607848660396, "compression/movement_sparsity/linear_layer_sparsity": 0.8904680183330739, "compression/movement_sparsity/model_sparsity": 0.8598777025032912, "compression_loss": 100.3666000366211, "distillation_loss": 5.292201995849609, "epoch": 3.85, "learning_rate": 3.415046491969569e-05, "loss": 105.5312, "step": 4558, "task_loss": 3.2284302711486816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9439933625782824, "compression/movement_sparsity/importance_threshold": -0.00039838070997770016, "compression/movement_sparsity/linear_layer_sparsity": 0.8905034092626171, "compression/movement_sparsity/model_sparsity": 0.8599118776455293, "compression_loss": 100.3791732788086, "distillation_loss": 5.360626697540283, "epoch": 3.85, "learning_rate": 3.414576876115338e-05, "loss": 105.4232, "step": 4559, "task_loss": 2.5087661743164062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9441169065868393, "compression/movement_sparsity/importance_threshold": -0.00039750192931690444, "compression/movement_sparsity/linear_layer_sparsity": 0.8905948437800489, "compression/movement_sparsity/model_sparsity": 0.8600001711060019, "compression_loss": 100.3917007446289, "distillation_loss": 4.536935806274414, "epoch": 3.85, "learning_rate": 3.414107260261107e-05, "loss": 104.6452, "step": 4560, "task_loss": 2.531367301940918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9442402687793753, "compression/movement_sparsity/importance_threshold": -0.0003966244419313144, "compression/movement_sparsity/linear_layer_sparsity": 0.8906705026236984, "compression/movement_sparsity/model_sparsity": 0.8600732308356183, "compression_loss": 100.40424346923828, "distillation_loss": 3.380441665649414, "epoch": 3.85, "learning_rate": 3.4136376444068754e-05, "loss": 103.7923, "step": 4561, "task_loss": 1.1174545288085938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9443634492897756, "compression/movement_sparsity/importance_threshold": -0.00039574824686859026, "compression/movement_sparsity/linear_layer_sparsity": 0.8907023401512862, "compression/movement_sparsity/model_sparsity": 0.8601039746461897, "compression_loss": 100.41670227050781, "distillation_loss": 4.01112174987793, "epoch": 3.86, "learning_rate": 3.413168028552644e-05, "loss": 104.449, "step": 4562, "task_loss": 2.2067816257476807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9444864482519251, "compression/movement_sparsity/importance_threshold": -0.00039487334317639573, "compression/movement_sparsity/linear_layer_sparsity": 0.8907414037244613, "compression/movement_sparsity/model_sparsity": 0.8601416962654526, "compression_loss": 100.4291763305664, "distillation_loss": 4.612224102020264, "epoch": 3.86, "learning_rate": 3.412698412698413e-05, "loss": 104.4622, "step": 4563, "task_loss": 1.8558109998703003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.944609265799709, "compression/movement_sparsity/importance_threshold": -0.0003939997299023928, "compression/movement_sparsity/linear_layer_sparsity": 0.8908399092733011, "compression/movement_sparsity/model_sparsity": 0.8602368178456513, "compression_loss": 100.44158935546875, "distillation_loss": 4.044341564178467, "epoch": 3.86, "learning_rate": 3.412228796844182e-05, "loss": 105.2165, "step": 4564, "task_loss": 2.6036384105682373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9447319020670126, "compression/movement_sparsity/importance_threshold": -0.0003931274060942425, "compression/movement_sparsity/linear_layer_sparsity": 0.8908943669468942, "compression/movement_sparsity/model_sparsity": 0.860289404730625, "compression_loss": 100.4540023803711, "distillation_loss": 5.09814453125, "epoch": 3.86, "learning_rate": 3.41175918098995e-05, "loss": 104.9898, "step": 4565, "task_loss": 1.5031282901763916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9448543571877209, "compression/movement_sparsity/importance_threshold": -0.00039225637079960685, "compression/movement_sparsity/linear_layer_sparsity": 0.8909707412406018, "compression/movement_sparsity/model_sparsity": 0.860363155332389, "compression_loss": 100.46635437011719, "distillation_loss": 5.171000957489014, "epoch": 3.86, "learning_rate": 3.411289565135719e-05, "loss": 104.6933, "step": 4566, "task_loss": 2.509242534637451 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9449766312957192, "compression/movement_sparsity/importance_threshold": -0.00039138662306614693, "compression/movement_sparsity/linear_layer_sparsity": 0.8909853006492853, "compression/movement_sparsity/model_sparsity": 0.8603772145805941, "compression_loss": 100.47877502441406, "distillation_loss": 5.340817451477051, "epoch": 3.86, "learning_rate": 3.410819949281488e-05, "loss": 104.7132, "step": 4567, "task_loss": 3.4580533504486084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9450987245248924, "compression/movement_sparsity/importance_threshold": -0.0003905181619415256, "compression/movement_sparsity/linear_layer_sparsity": 0.8910940609822919, "compression/movement_sparsity/model_sparsity": 0.8604822386615761, "compression_loss": 100.49110412597656, "distillation_loss": 4.298709869384766, "epoch": 3.86, "learning_rate": 3.4103503334272565e-05, "loss": 104.4083, "step": 4568, "task_loss": 2.1279351711273193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9452206370091258, "compression/movement_sparsity/importance_threshold": -0.00038965098647340476, "compression/movement_sparsity/linear_layer_sparsity": 0.8911624460836837, "compression/movement_sparsity/model_sparsity": 0.8605482745243579, "compression_loss": 100.50346374511719, "distillation_loss": 4.461252212524414, "epoch": 3.86, "learning_rate": 3.409880717573025e-05, "loss": 104.2246, "step": 4569, "task_loss": 1.4516161680221558 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9453423688823045, "compression/movement_sparsity/importance_threshold": -0.00038878509570944644, "compression/movement_sparsity/linear_layer_sparsity": 0.8912842156835811, "compression/movement_sparsity/model_sparsity": 0.8606658609638917, "compression_loss": 100.5157699584961, "distillation_loss": 3.7760863304138184, "epoch": 3.86, "learning_rate": 3.409411101718794e-05, "loss": 104.5311, "step": 4570, "task_loss": 2.078402280807495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9454639202783137, "compression/movement_sparsity/importance_threshold": -0.0003879204886973117, "compression/movement_sparsity/linear_layer_sparsity": 0.8913364435378262, "compression/movement_sparsity/model_sparsity": 0.8607162946306718, "compression_loss": 100.52815246582031, "distillation_loss": 4.34738826751709, "epoch": 3.86, "learning_rate": 3.408941485864563e-05, "loss": 104.8777, "step": 4571, "task_loss": 3.355710029602051 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9455852913310384, "compression/movement_sparsity/importance_threshold": -0.00038705716448466255, "compression/movement_sparsity/linear_layer_sparsity": 0.8914691833719486, "compression/movement_sparsity/model_sparsity": 0.8608444744431366, "compression_loss": 100.54048156738281, "distillation_loss": 4.624619483947754, "epoch": 3.86, "learning_rate": 3.408471870010332e-05, "loss": 105.136, "step": 4572, "task_loss": 2.075629711151123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9457064821743638, "compression/movement_sparsity/importance_threshold": -0.00038619512211916177, "compression/movement_sparsity/linear_layer_sparsity": 0.891571373488588, "compression/movement_sparsity/model_sparsity": 0.8609431540148959, "compression_loss": 100.5527572631836, "distillation_loss": 3.1272497177124023, "epoch": 3.87, "learning_rate": 3.408002254156101e-05, "loss": 104.8905, "step": 4573, "task_loss": 2.2421083450317383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9458274929421752, "compression/movement_sparsity/importance_threshold": -0.0003853343606484696, "compression/movement_sparsity/linear_layer_sparsity": 0.891640235556685, "compression/movement_sparsity/model_sparsity": 0.8610096504591093, "compression_loss": 100.56505584716797, "distillation_loss": 3.5168943405151367, "epoch": 3.87, "learning_rate": 3.407532638301869e-05, "loss": 104.9081, "step": 4574, "task_loss": 1.3668253421783447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9459483237683575, "compression/movement_sparsity/importance_threshold": -0.0003844748791202498, "compression/movement_sparsity/linear_layer_sparsity": 0.8917834328858241, "compression/movement_sparsity/model_sparsity": 0.861147928519466, "compression_loss": 100.5772933959961, "distillation_loss": 4.796130657196045, "epoch": 3.87, "learning_rate": 3.407063022447638e-05, "loss": 105.1665, "step": 4575, "task_loss": 2.977069854736328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.946068974786796, "compression/movement_sparsity/importance_threshold": -0.00038361667658216164, "compression/movement_sparsity/linear_layer_sparsity": 0.8918205647438422, "compression/movement_sparsity/model_sparsity": 0.8611837847839301, "compression_loss": 100.58955383300781, "distillation_loss": 4.873356819152832, "epoch": 3.87, "learning_rate": 3.406593406593407e-05, "loss": 104.8599, "step": 4576, "task_loss": 2.404142379760742 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9461894461313758, "compression/movement_sparsity/importance_threshold": -0.00038275975208186975, "compression/movement_sparsity/linear_layer_sparsity": 0.8917889537754395, "compression/movement_sparsity/model_sparsity": 0.8611532597495388, "compression_loss": 100.60177612304688, "distillation_loss": 5.337143421173096, "epoch": 3.87, "learning_rate": 3.4061237907391756e-05, "loss": 104.757, "step": 4577, "task_loss": 3.4645745754241943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.946309737935982, "compression/movement_sparsity/importance_threshold": -0.00038190410466703347, "compression/movement_sparsity/linear_layer_sparsity": 0.8918329539540159, "compression/movement_sparsity/model_sparsity": 0.8611957483866207, "compression_loss": 100.61396789550781, "distillation_loss": 4.309157848358154, "epoch": 3.87, "learning_rate": 3.405654174884944e-05, "loss": 105.3081, "step": 4578, "task_loss": 2.032681941986084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9464298503344998, "compression/movement_sparsity/importance_threshold": -0.0003810497333853174, "compression/movement_sparsity/linear_layer_sparsity": 0.8918268607043539, "compression/movement_sparsity/model_sparsity": 0.8611898644588296, "compression_loss": 100.62615966796875, "distillation_loss": 3.962210178375244, "epoch": 3.87, "learning_rate": 3.405184559030713e-05, "loss": 105.0772, "step": 4579, "task_loss": 1.802172064781189 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9465497834608143, "compression/movement_sparsity/importance_threshold": -0.00038019663728438083, "compression/movement_sparsity/linear_layer_sparsity": 0.8920256365788437, "compression/movement_sparsity/model_sparsity": 0.8613818117705246, "compression_loss": 100.63836669921875, "distillation_loss": 2.938237428665161, "epoch": 3.87, "learning_rate": 3.404714943176482e-05, "loss": 104.7827, "step": 4580, "task_loss": 1.2365801334381104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9466695374488105, "compression/movement_sparsity/importance_threshold": -0.0003793448154118875, "compression/movement_sparsity/linear_layer_sparsity": 0.8920866644688041, "compression/movement_sparsity/model_sparsity": 0.8614407431647211, "compression_loss": 100.65054321289062, "distillation_loss": 5.981217861175537, "epoch": 3.87, "learning_rate": 3.404245327322251e-05, "loss": 105.3095, "step": 4581, "task_loss": 3.1565239429473877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.946789112432374, "compression/movement_sparsity/importance_threshold": -0.0003784942668154977, "compression/movement_sparsity/linear_layer_sparsity": 0.8921616794074013, "compression/movement_sparsity/model_sparsity": 0.8615131811094046, "compression_loss": 100.66270446777344, "distillation_loss": 2.5796613693237305, "epoch": 3.87, "learning_rate": 3.4037757114680194e-05, "loss": 104.2884, "step": 4582, "task_loss": 1.917545199394226 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9469085085453894, "compression/movement_sparsity/importance_threshold": -0.0003776449905428742, "compression/movement_sparsity/linear_layer_sparsity": 0.8922073012727761, "compression/movement_sparsity/model_sparsity": 0.8615572357233545, "compression_loss": 100.67484283447266, "distillation_loss": 5.771102428436279, "epoch": 3.87, "learning_rate": 3.403306095613788e-05, "loss": 105.0305, "step": 4583, "task_loss": 2.3716957569122314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9470277259217422, "compression/movement_sparsity/importance_threshold": -0.00037679698564167894, "compression/movement_sparsity/linear_layer_sparsity": 0.8922457924859046, "compression/movement_sparsity/model_sparsity": 0.8615944046448992, "compression_loss": 100.68695831298828, "distillation_loss": 3.98105525970459, "epoch": 3.87, "learning_rate": 3.402836479759557e-05, "loss": 104.7921, "step": 4584, "task_loss": 2.2720730304718018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9471467646953173, "compression/movement_sparsity/importance_threshold": -0.00037595025115957394, "compression/movement_sparsity/linear_layer_sparsity": 0.8922821015763559, "compression/movement_sparsity/model_sparsity": 0.8616294664063936, "compression_loss": 100.69908905029297, "distillation_loss": 4.63734769821167, "epoch": 3.88, "learning_rate": 3.402366863905326e-05, "loss": 104.9176, "step": 4585, "task_loss": 3.6042864322662354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.947265625, "compression/movement_sparsity/importance_threshold": -0.00037510478614422027, "compression/movement_sparsity/linear_layer_sparsity": 0.8923226795188207, "compression/movement_sparsity/model_sparsity": 0.8616686503717025, "compression_loss": 100.71106719970703, "distillation_loss": 5.726658821105957, "epoch": 3.88, "learning_rate": 3.401897248051094e-05, "loss": 105.5327, "step": 4586, "task_loss": 2.7858784198760986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9473843069696755, "compression/movement_sparsity/importance_threshold": -0.0003742605896432799, "compression/movement_sparsity/linear_layer_sparsity": 0.8923548389989346, "compression/movement_sparsity/model_sparsity": 0.8616997050747404, "compression_loss": 100.72312927246094, "distillation_loss": 3.79140305519104, "epoch": 3.88, "learning_rate": 3.401427632196863e-05, "loss": 104.7183, "step": 4587, "task_loss": 1.4985935688018799 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9475028107382286, "compression/movement_sparsity/importance_threshold": -0.0003734176607044157, "compression/movement_sparsity/linear_layer_sparsity": 0.8924193368216771, "compression/movement_sparsity/model_sparsity": 0.861761987198853, "compression_loss": 100.73511505126953, "distillation_loss": 3.912199020385742, "epoch": 3.88, "learning_rate": 3.400958016342632e-05, "loss": 104.9501, "step": 4588, "task_loss": 1.9237053394317627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9476211364395448, "compression/movement_sparsity/importance_threshold": -0.0003725759983752878, "compression/movement_sparsity/linear_layer_sparsity": 0.892471862780113, "compression/movement_sparsity/model_sparsity": 0.8618127087290279, "compression_loss": 100.74717712402344, "distillation_loss": 4.4009175300598145, "epoch": 3.88, "learning_rate": 3.4004884004884005e-05, "loss": 105.7296, "step": 4589, "task_loss": 3.0086300373077393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9477392842075092, "compression/movement_sparsity/importance_threshold": -0.00037173560170355915, "compression/movement_sparsity/linear_layer_sparsity": 0.8925943120575657, "compression/movement_sparsity/model_sparsity": 0.8619309514971021, "compression_loss": 100.75917053222656, "distillation_loss": 4.278220176696777, "epoch": 3.88, "learning_rate": 3.40001878463417e-05, "loss": 105.0762, "step": 4590, "task_loss": 2.840219020843506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9478572541760069, "compression/movement_sparsity/importance_threshold": -0.00037089646973689164, "compression/movement_sparsity/linear_layer_sparsity": 0.8926554353408671, "compression/movement_sparsity/model_sparsity": 0.8619899750075849, "compression_loss": 100.77104187011719, "distillation_loss": 4.455009460449219, "epoch": 3.88, "learning_rate": 3.399549168779938e-05, "loss": 104.7243, "step": 4591, "task_loss": 3.364497661590576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9479750464789227, "compression/movement_sparsity/importance_threshold": -0.00037005860152294727, "compression/movement_sparsity/linear_layer_sparsity": 0.8927472395074957, "compression/movement_sparsity/model_sparsity": 0.8620786254186671, "compression_loss": 100.78308868408203, "distillation_loss": 7.203469276428223, "epoch": 3.88, "learning_rate": 3.399079552925707e-05, "loss": 105.5137, "step": 4592, "task_loss": 4.196785926818848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9480926612501424, "compression/movement_sparsity/importance_threshold": -0.0003692219961093871, "compression/movement_sparsity/linear_layer_sparsity": 0.8928500258325167, "compression/movement_sparsity/model_sparsity": 0.862177880717216, "compression_loss": 100.79490661621094, "distillation_loss": 4.781920433044434, "epoch": 3.88, "learning_rate": 3.398609937071476e-05, "loss": 104.9357, "step": 4593, "task_loss": 2.7079966068267822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9482100986235505, "compression/movement_sparsity/importance_threshold": -0.00036838665254387316, "compression/movement_sparsity/linear_layer_sparsity": 0.8928034977304016, "compression/movement_sparsity/model_sparsity": 0.8621329509985457, "compression_loss": 100.80677795410156, "distillation_loss": 6.724857330322266, "epoch": 3.88, "learning_rate": 3.3981403212172444e-05, "loss": 105.3067, "step": 4594, "task_loss": 4.460787296295166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9483273587330325, "compression/movement_sparsity/importance_threshold": -0.0003675525698740691, "compression/movement_sparsity/linear_layer_sparsity": 0.8929057832403821, "compression/movement_sparsity/model_sparsity": 0.8622317226865913, "compression_loss": 100.81868743896484, "distillation_loss": 3.7851667404174805, "epoch": 3.88, "learning_rate": 3.397670705363013e-05, "loss": 104.7908, "step": 4595, "task_loss": 2.0780036449432373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9484444417124736, "compression/movement_sparsity/importance_threshold": -0.0003667197471476334, "compression/movement_sparsity/linear_layer_sparsity": 0.8930157002176494, "compression/movement_sparsity/model_sparsity": 0.8623378636775454, "compression_loss": 100.83049774169922, "distillation_loss": 4.057870864868164, "epoch": 3.88, "learning_rate": 3.3972010895087817e-05, "loss": 104.4433, "step": 4596, "task_loss": 3.5030157566070557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9485613476957586, "compression/movement_sparsity/importance_threshold": -0.0003658881834122307, "compression/movement_sparsity/linear_layer_sparsity": 0.8930584960552945, "compression/movement_sparsity/model_sparsity": 0.862379189346512, "compression_loss": 100.84231567382812, "distillation_loss": 4.159332275390625, "epoch": 3.89, "learning_rate": 3.396731473654551e-05, "loss": 105.1434, "step": 4597, "task_loss": 1.8776557445526123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.948678076816773, "compression/movement_sparsity/importance_threshold": -0.000365057877715522, "compression/movement_sparsity/linear_layer_sparsity": 0.8930711118246533, "compression/movement_sparsity/model_sparsity": 0.8623913717253826, "compression_loss": 100.85417175292969, "distillation_loss": 4.161245346069336, "epoch": 3.89, "learning_rate": 3.3962618578003196e-05, "loss": 105.5431, "step": 4598, "task_loss": 1.5879749059677124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9487946292094017, "compression/movement_sparsity/importance_threshold": -0.0003642288291051685, "compression/movement_sparsity/linear_layer_sparsity": 0.8931401646794326, "compression/movement_sparsity/model_sparsity": 0.8624580524021688, "compression_loss": 100.86585998535156, "distillation_loss": 4.046158790588379, "epoch": 3.89, "learning_rate": 3.395792241946088e-05, "loss": 104.6542, "step": 4599, "task_loss": 1.949242353439331 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9489110050075299, "compression/movement_sparsity/importance_threshold": -0.0003634010366288338, "compression/movement_sparsity/linear_layer_sparsity": 0.8933049566761602, "compression/movement_sparsity/model_sparsity": 0.8626171832868493, "compression_loss": 100.87762451171875, "distillation_loss": 6.359089374542236, "epoch": 3.89, "learning_rate": 3.395322626091857e-05, "loss": 105.8183, "step": 4600, "task_loss": 2.788780450820923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9490272043450427, "compression/movement_sparsity/importance_threshold": -0.00036257449933417823, "compression/movement_sparsity/linear_layer_sparsity": 0.8933296397031665, "compression/movement_sparsity/model_sparsity": 0.8626410183759441, "compression_loss": 100.88935089111328, "distillation_loss": 4.825501918792725, "epoch": 3.89, "learning_rate": 3.3948530102376255e-05, "loss": 106.1083, "step": 4601, "task_loss": 3.0368785858154297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9491432273558256, "compression/movement_sparsity/importance_threshold": -0.00036174921626886195, "compression/movement_sparsity/linear_layer_sparsity": 0.8933096905707116, "compression/movement_sparsity/model_sparsity": 0.8626217545575597, "compression_loss": 100.90104675292969, "distillation_loss": 3.7299962043762207, "epoch": 3.89, "learning_rate": 3.394383394383395e-05, "loss": 104.5767, "step": 4602, "task_loss": 1.9180433750152588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9492590741737632, "compression/movement_sparsity/importance_threshold": -0.0003609251864805504, "compression/movement_sparsity/linear_layer_sparsity": 0.8934119641565245, "compression/movement_sparsity/model_sparsity": 0.8627205147310696, "compression_loss": 100.91278076171875, "distillation_loss": 4.031352996826172, "epoch": 3.89, "learning_rate": 3.393913778529163e-05, "loss": 104.7515, "step": 4603, "task_loss": 1.4369220733642578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9493747449327409, "compression/movement_sparsity/importance_threshold": -0.0003601024090169047, "compression/movement_sparsity/linear_layer_sparsity": 0.8934769508701399, "compression/movement_sparsity/model_sparsity": 0.8627832689511498, "compression_loss": 100.9244384765625, "distillation_loss": 3.235799789428711, "epoch": 3.89, "learning_rate": 3.393444162674932e-05, "loss": 105.0109, "step": 4604, "task_loss": 3.331874132156372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9494902397666438, "compression/movement_sparsity/importance_threshold": -0.0003592808829255851, "compression/movement_sparsity/linear_layer_sparsity": 0.8934913552646441, "compression/movement_sparsity/model_sparsity": 0.8627971785103896, "compression_loss": 100.93608093261719, "distillation_loss": 5.41348934173584, "epoch": 3.89, "learning_rate": 3.392974546820701e-05, "loss": 105.3568, "step": 4605, "task_loss": 3.0118823051452637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9496055588093572, "compression/movement_sparsity/importance_threshold": -0.0003584606072542544, "compression/movement_sparsity/linear_layer_sparsity": 0.8935467072508096, "compression/movement_sparsity/model_sparsity": 0.8628506289855479, "compression_loss": 100.94764709472656, "distillation_loss": 3.198028564453125, "epoch": 3.89, "learning_rate": 3.39250493096647e-05, "loss": 105.1729, "step": 4606, "task_loss": 2.731952428817749 }, { "compression/movement_sparsity/importance_regularization_factor": 0.949720702194766, "compression/movement_sparsity/importance_threshold": -0.00035764158105057366, "compression/movement_sparsity/linear_layer_sparsity": 0.8935961686981633, "compression/movement_sparsity/model_sparsity": 0.8628983912800237, "compression_loss": 100.95922088623047, "distillation_loss": 5.662413120269775, "epoch": 3.89, "learning_rate": 3.392035315112239e-05, "loss": 105.5145, "step": 4607, "task_loss": 3.497105360031128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9498356700567555, "compression/movement_sparsity/importance_threshold": -0.0003568238033622058, "compression/movement_sparsity/linear_layer_sparsity": 0.8936306056942956, "compression/movement_sparsity/model_sparsity": 0.8629316452593983, "compression_loss": 100.9708251953125, "distillation_loss": 4.492818832397461, "epoch": 3.89, "learning_rate": 3.3915656992580066e-05, "loss": 104.9457, "step": 4608, "task_loss": 2.6285297870635986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9499504625292108, "compression/movement_sparsity/importance_threshold": -0.0003560072732368127, "compression/movement_sparsity/linear_layer_sparsity": 0.893641039340977, "compression/movement_sparsity/model_sparsity": 0.8629417204782185, "compression_loss": 100.98234558105469, "distillation_loss": 5.413853645324707, "epoch": 3.9, "learning_rate": 3.391096083403776e-05, "loss": 105.6403, "step": 4609, "task_loss": 2.450701951980591 }, { "compression/movement_sparsity/importance_regularization_factor": 0.950065079746017, "compression/movement_sparsity/importance_threshold": -0.00035519198972205554, "compression/movement_sparsity/linear_layer_sparsity": 0.8936996943215778, "compression/movement_sparsity/model_sparsity": 0.8629983604797918, "compression_loss": 100.99392700195312, "distillation_loss": 3.021636724472046, "epoch": 3.9, "learning_rate": 3.3906264675495446e-05, "loss": 104.9728, "step": 4610, "task_loss": 1.3569285869598389 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9501795218410594, "compression/movement_sparsity/importance_threshold": -0.0003543779518655945, "compression/movement_sparsity/linear_layer_sparsity": 0.8937094244423687, "compression/movement_sparsity/model_sparsity": 0.8630077563410001, "compression_loss": 101.00546264648438, "distillation_loss": 4.70419454574585, "epoch": 3.9, "learning_rate": 3.390156851695314e-05, "loss": 105.1903, "step": 4611, "task_loss": 3.1564643383026123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9502937889482228, "compression/movement_sparsity/importance_threshold": -0.0003535651587150959, "compression/movement_sparsity/linear_layer_sparsity": 0.8937685325413397, "compression/movement_sparsity/model_sparsity": 0.8630648338949337, "compression_loss": 101.01701354980469, "distillation_loss": 4.398809432983398, "epoch": 3.9, "learning_rate": 3.389687235841082e-05, "loss": 105.5672, "step": 4612, "task_loss": 2.8489561080932617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9504078812013926, "compression/movement_sparsity/importance_threshold": -0.00035275360931821733, "compression/movement_sparsity/linear_layer_sparsity": 0.8937631666659035, "compression/movement_sparsity/model_sparsity": 0.8630596523538262, "compression_loss": 101.02855682373047, "distillation_loss": 4.802545547485352, "epoch": 3.9, "learning_rate": 3.389217619986851e-05, "loss": 105.6374, "step": 4613, "task_loss": 3.2620913982391357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.950521798734454, "compression/movement_sparsity/importance_threshold": -0.0003519433027226234, "compression/movement_sparsity/linear_layer_sparsity": 0.8938960376658699, "compression/movement_sparsity/model_sparsity": 0.8631879588261847, "compression_loss": 101.04000854492188, "distillation_loss": 4.717199325561523, "epoch": 3.9, "learning_rate": 3.38874800413262e-05, "loss": 105.8617, "step": 4614, "task_loss": 2.635728359222412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9506355416812919, "compression/movement_sparsity/importance_threshold": -0.00035113423797597434, "compression/movement_sparsity/linear_layer_sparsity": 0.8939786244509159, "compression/movement_sparsity/model_sparsity": 0.8632677085010977, "compression_loss": 101.05146789550781, "distillation_loss": 3.7227227687835693, "epoch": 3.9, "learning_rate": 3.3882783882783884e-05, "loss": 104.6805, "step": 4615, "task_loss": 2.319870710372925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9507491101757917, "compression/movement_sparsity/importance_threshold": -0.0003503264141259321, "compression/movement_sparsity/linear_layer_sparsity": 0.893982976772103, "compression/movement_sparsity/model_sparsity": 0.8632719113066627, "compression_loss": 101.06298065185547, "distillation_loss": 3.5966248512268066, "epoch": 3.9, "learning_rate": 3.387808772424157e-05, "loss": 105.0071, "step": 4616, "task_loss": 1.7825926542282104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9508625043518383, "compression/movement_sparsity/importance_threshold": -0.00034951983022016037, "compression/movement_sparsity/linear_layer_sparsity": 0.8940524588969172, "compression/movement_sparsity/model_sparsity": 0.8633390065067376, "compression_loss": 101.07438659667969, "distillation_loss": 5.1295013427734375, "epoch": 3.9, "learning_rate": 3.387339156569926e-05, "loss": 105.8537, "step": 4617, "task_loss": 2.6002728939056396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.950975724343317, "compression/movement_sparsity/importance_threshold": -0.00034871448530631854, "compression/movement_sparsity/linear_layer_sparsity": 0.8940299222200854, "compression/movement_sparsity/model_sparsity": 0.8633172440340858, "compression_loss": 101.0858383178711, "distillation_loss": 5.532937049865723, "epoch": 3.9, "learning_rate": 3.386869540715695e-05, "loss": 106.2084, "step": 4618, "task_loss": 3.6131467819213867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9510887702841129, "compression/movement_sparsity/importance_threshold": -0.0003479103784320694, "compression/movement_sparsity/linear_layer_sparsity": 0.8940210148668615, "compression/movement_sparsity/model_sparsity": 0.8633086426758473, "compression_loss": 101.09725952148438, "distillation_loss": 4.998741149902344, "epoch": 3.9, "learning_rate": 3.3863999248614636e-05, "loss": 105.4243, "step": 4619, "task_loss": 2.799506664276123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9512016423081112, "compression/movement_sparsity/importance_threshold": -0.000347107508645075, "compression/movement_sparsity/linear_layer_sparsity": 0.8940969956630371, "compression/movement_sparsity/model_sparsity": 0.8633820132979301, "compression_loss": 101.10865783691406, "distillation_loss": 3.444766044616699, "epoch": 3.9, "learning_rate": 3.385930309007232e-05, "loss": 105.2898, "step": 4620, "task_loss": 1.9429491758346558 }, { "compression/movement_sparsity/importance_regularization_factor": 0.951314340549197, "compression/movement_sparsity/importance_threshold": -0.0003463058749929972, "compression/movement_sparsity/linear_layer_sparsity": 0.8941654165369317, "compression/movement_sparsity/model_sparsity": 0.8634480837043192, "compression_loss": 101.12001037597656, "distillation_loss": 5.144679069519043, "epoch": 3.91, "learning_rate": 3.385460693153001e-05, "loss": 105.5513, "step": 4621, "task_loss": 2.9868202209472656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9514268651412552, "compression/movement_sparsity/importance_threshold": -0.00034550547652349804, "compression/movement_sparsity/linear_layer_sparsity": 0.8942891774728243, "compression/movement_sparsity/model_sparsity": 0.8635675930713308, "compression_loss": 101.13138580322266, "distillation_loss": 5.343113899230957, "epoch": 3.91, "learning_rate": 3.3849910772987695e-05, "loss": 105.9166, "step": 4622, "task_loss": 2.8850460052490234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9515392162181713, "compression/movement_sparsity/importance_threshold": -0.0003447063122842386, "compression/movement_sparsity/linear_layer_sparsity": 0.8943306974245324, "compression/movement_sparsity/model_sparsity": 0.8636076866849673, "compression_loss": 101.14269256591797, "distillation_loss": 4.109256267547607, "epoch": 3.91, "learning_rate": 3.384521461444539e-05, "loss": 105.5254, "step": 4623, "task_loss": 2.4072413444519043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9516513939138304, "compression/movement_sparsity/importance_threshold": -0.0003439083813228808, "compression/movement_sparsity/linear_layer_sparsity": 0.8944197709567723, "compression/movement_sparsity/model_sparsity": 0.8636937002673526, "compression_loss": 101.15403747558594, "distillation_loss": 4.41938591003418, "epoch": 3.91, "learning_rate": 3.3840518455903075e-05, "loss": 105.1059, "step": 4624, "task_loss": 2.1995387077331543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9517633983621173, "compression/movement_sparsity/importance_threshold": -0.0003431116826870884, "compression/movement_sparsity/linear_layer_sparsity": 0.8944368344406592, "compression/movement_sparsity/model_sparsity": 0.8637101775680746, "compression_loss": 101.16531372070312, "distillation_loss": 4.7625837326049805, "epoch": 3.91, "learning_rate": 3.383582229736076e-05, "loss": 104.8897, "step": 4625, "task_loss": 3.341954469680786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9518752296969175, "compression/movement_sparsity/importance_threshold": -0.00034231621542452073, "compression/movement_sparsity/linear_layer_sparsity": 0.8944619109651974, "compression/movement_sparsity/model_sparsity": 0.8637343926368505, "compression_loss": 101.17649841308594, "distillation_loss": 5.239082336425781, "epoch": 3.91, "learning_rate": 3.383112613881845e-05, "loss": 105.5515, "step": 4626, "task_loss": 2.3394267559051514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9519868880521161, "compression/movement_sparsity/importance_threshold": -0.0003415219785828415, "compression/movement_sparsity/linear_layer_sparsity": 0.8945750116952236, "compression/movement_sparsity/model_sparsity": 0.8638436080088617, "compression_loss": 101.18775939941406, "distillation_loss": 6.027646064758301, "epoch": 3.91, "learning_rate": 3.3826429980276134e-05, "loss": 106.2365, "step": 4627, "task_loss": 3.691848039627075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.952098373561598, "compression/movement_sparsity/importance_threshold": -0.00034072897120971096, "compression/movement_sparsity/linear_layer_sparsity": 0.8946805525029686, "compression/movement_sparsity/model_sparsity": 0.8639455231651791, "compression_loss": 101.19902038574219, "distillation_loss": 4.133080959320068, "epoch": 3.91, "learning_rate": 3.382173382173383e-05, "loss": 104.735, "step": 4628, "task_loss": 2.731036901473999 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9522096863592486, "compression/movement_sparsity/importance_threshold": -0.0003399371923527919, "compression/movement_sparsity/linear_layer_sparsity": 0.8947141428831988, "compression/movement_sparsity/model_sparsity": 0.8639779596125123, "compression_loss": 101.2102279663086, "distillation_loss": 6.085507392883301, "epoch": 3.91, "learning_rate": 3.3817037663191506e-05, "loss": 105.9603, "step": 4629, "task_loss": 2.5308289527893066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.952320826578953, "compression/movement_sparsity/importance_threshold": -0.00033914664105974546, "compression/movement_sparsity/linear_layer_sparsity": 0.8947708423003073, "compression/movement_sparsity/model_sparsity": 0.8640327112302154, "compression_loss": 101.22146606445312, "distillation_loss": 4.936126708984375, "epoch": 3.91, "learning_rate": 3.38123415046492e-05, "loss": 105.961, "step": 4630, "task_loss": 2.7776665687561035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9524317943545961, "compression/movement_sparsity/importance_threshold": -0.0003383573163782353, "compression/movement_sparsity/linear_layer_sparsity": 0.8948654128738274, "compression/movement_sparsity/model_sparsity": 0.864124033013602, "compression_loss": 101.23263549804688, "distillation_loss": 4.175732612609863, "epoch": 3.91, "learning_rate": 3.3807645346106886e-05, "loss": 105.1168, "step": 4631, "task_loss": 1.9007903337478638 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9525425898200633, "compression/movement_sparsity/importance_threshold": -0.0003375692173559217, "compression/movement_sparsity/linear_layer_sparsity": 0.8949443627877445, "compression/movement_sparsity/model_sparsity": 0.8642002707550975, "compression_loss": 101.24385070800781, "distillation_loss": 4.522026062011719, "epoch": 3.91, "learning_rate": 3.380294918756457e-05, "loss": 105.4772, "step": 4632, "task_loss": 2.8768599033355713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9526532131092398, "compression/movement_sparsity/importance_threshold": -0.00033678234304046657, "compression/movement_sparsity/linear_layer_sparsity": 0.8950332932299727, "compression/movement_sparsity/model_sparsity": 0.8642861461630532, "compression_loss": 101.25502014160156, "distillation_loss": 5.933039665222168, "epoch": 3.92, "learning_rate": 3.379825302902226e-05, "loss": 105.6031, "step": 4633, "task_loss": 2.587129831314087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9527636643560105, "compression/movement_sparsity/importance_threshold": -0.0003359966924795319, "compression/movement_sparsity/linear_layer_sparsity": 0.8951373315925957, "compression/movement_sparsity/model_sparsity": 0.8643866104878606, "compression_loss": 101.26614379882812, "distillation_loss": 5.678989410400391, "epoch": 3.92, "learning_rate": 3.3793556870479945e-05, "loss": 106.0472, "step": 4634, "task_loss": 2.5053799152374268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9528739436942606, "compression/movement_sparsity/importance_threshold": -0.0003352122647207797, "compression/movement_sparsity/linear_layer_sparsity": 0.8952668995981269, "compression/movement_sparsity/model_sparsity": 0.8645117274338041, "compression_loss": 101.27729034423828, "distillation_loss": 3.75211501121521, "epoch": 3.92, "learning_rate": 3.378886071193764e-05, "loss": 106.2011, "step": 4635, "task_loss": 2.4246912002563477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9529840512578753, "compression/movement_sparsity/importance_threshold": -0.00033442905881187184, "compression/movement_sparsity/linear_layer_sparsity": 0.895281041660943, "compression/movement_sparsity/model_sparsity": 0.8645253836732565, "compression_loss": 101.28834533691406, "distillation_loss": 4.342446327209473, "epoch": 3.92, "learning_rate": 3.3784164553395324e-05, "loss": 105.9424, "step": 4636, "task_loss": 2.0181853771209717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9530939871807399, "compression/movement_sparsity/importance_threshold": -0.0003336470738004695, "compression/movement_sparsity/linear_layer_sparsity": 0.8953574994238243, "compression/movement_sparsity/model_sparsity": 0.864599214876771, "compression_loss": 101.29949188232422, "distillation_loss": 4.876706123352051, "epoch": 3.92, "learning_rate": 3.377946839485302e-05, "loss": 105.4748, "step": 4637, "task_loss": 2.7973194122314453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9532037515967392, "compression/movement_sparsity/importance_threshold": -0.00033286630873423546, "compression/movement_sparsity/linear_layer_sparsity": 0.8954113251165325, "compression/movement_sparsity/model_sparsity": 0.8646511914913476, "compression_loss": 101.310546875, "distillation_loss": 3.7658932209014893, "epoch": 3.92, "learning_rate": 3.37747722363107e-05, "loss": 105.9875, "step": 4638, "task_loss": 2.706292152404785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9533133446397585, "compression/movement_sparsity/importance_threshold": -0.0003320867626608317, "compression/movement_sparsity/linear_layer_sparsity": 0.8954210790856586, "compression/movement_sparsity/model_sparsity": 0.8646606103816276, "compression_loss": 101.32159423828125, "distillation_loss": 7.673901081085205, "epoch": 3.92, "learning_rate": 3.377007607776838e-05, "loss": 106.8003, "step": 4639, "task_loss": 3.233397960662842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.953422766443683, "compression/movement_sparsity/importance_threshold": -0.00033130843462791937, "compression/movement_sparsity/linear_layer_sparsity": 0.8955465809500261, "compression/movement_sparsity/model_sparsity": 0.8647818008708651, "compression_loss": 101.3326416015625, "distillation_loss": 5.169644832611084, "epoch": 3.92, "learning_rate": 3.3765379919226076e-05, "loss": 105.3514, "step": 4640, "task_loss": 2.9978318214416504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9535320171423978, "compression/movement_sparsity/importance_threshold": -0.00033053132368316036, "compression/movement_sparsity/linear_layer_sparsity": 0.8956379439224521, "compression/movement_sparsity/model_sparsity": 0.8648700252441228, "compression_loss": 101.34368896484375, "distillation_loss": 3.6914279460906982, "epoch": 3.92, "learning_rate": 3.376068376068376e-05, "loss": 105.1013, "step": 4641, "task_loss": 1.3731656074523926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.953641096869788, "compression/movement_sparsity/importance_threshold": -0.0003297554288742167, "compression/movement_sparsity/linear_layer_sparsity": 0.8956719516485495, "compression/movement_sparsity/model_sparsity": 0.8649028647002088, "compression_loss": 101.35469818115234, "distillation_loss": 6.178752422332764, "epoch": 3.92, "learning_rate": 3.375598760214145e-05, "loss": 106.1429, "step": 4642, "task_loss": 2.0787277221679688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9537500057597388, "compression/movement_sparsity/importance_threshold": -0.00032898074924875113, "compression/movement_sparsity/linear_layer_sparsity": 0.8956854378821457, "compression/movement_sparsity/model_sparsity": 0.8649158876401924, "compression_loss": 101.36573028564453, "distillation_loss": 3.525041103363037, "epoch": 3.92, "learning_rate": 3.3751291443599136e-05, "loss": 105.7979, "step": 4643, "task_loss": 1.9520769119262695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9538587439461352, "compression/movement_sparsity/importance_threshold": -0.00032820728385442397, "compression/movement_sparsity/linear_layer_sparsity": 0.8958114763340568, "compression/movement_sparsity/model_sparsity": 0.8650375962835408, "compression_loss": 101.376708984375, "distillation_loss": 4.217642784118652, "epoch": 3.93, "learning_rate": 3.374659528505683e-05, "loss": 105.5028, "step": 4644, "task_loss": 1.9497880935668945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9539673115628625, "compression/movement_sparsity/importance_threshold": -0.0003274350317388989, "compression/movement_sparsity/linear_layer_sparsity": 0.8958183088821121, "compression/movement_sparsity/model_sparsity": 0.8650441941125511, "compression_loss": 101.38763427734375, "distillation_loss": 4.203622817993164, "epoch": 3.93, "learning_rate": 3.3741899126514515e-05, "loss": 105.6532, "step": 4645, "task_loss": 2.5632643699645996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9540757087438059, "compression/movement_sparsity/importance_threshold": -0.00032666399194983525, "compression/movement_sparsity/linear_layer_sparsity": 0.8959433337797742, "compression/movement_sparsity/model_sparsity": 0.8651649240203568, "compression_loss": 101.39852905273438, "distillation_loss": 3.510265350341797, "epoch": 3.93, "learning_rate": 3.37372029679722e-05, "loss": 105.7806, "step": 4646, "task_loss": 2.2066001892089844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9541839356228503, "compression/movement_sparsity/importance_threshold": -0.00032589416353489675, "compression/movement_sparsity/linear_layer_sparsity": 0.8959421055905077, "compression/movement_sparsity/model_sparsity": 0.86516373802317, "compression_loss": 101.40943908691406, "distillation_loss": 5.680228233337402, "epoch": 3.93, "learning_rate": 3.373250680942989e-05, "loss": 105.9219, "step": 4647, "task_loss": 2.647474765777588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9542919923338811, "compression/movement_sparsity/importance_threshold": -0.0003251255455417445, "compression/movement_sparsity/linear_layer_sparsity": 0.8960166554865671, "compression/movement_sparsity/model_sparsity": 0.8652357269009574, "compression_loss": 101.42034149169922, "distillation_loss": 4.967499732971191, "epoch": 3.93, "learning_rate": 3.3727810650887574e-05, "loss": 106.442, "step": 4648, "task_loss": 3.546116352081299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9543998790107833, "compression/movement_sparsity/importance_threshold": -0.00032435813701804043, "compression/movement_sparsity/linear_layer_sparsity": 0.8960167628040758, "compression/movement_sparsity/model_sparsity": 0.8652358305317797, "compression_loss": 101.43122863769531, "distillation_loss": 4.016011714935303, "epoch": 3.93, "learning_rate": 3.372311449234527e-05, "loss": 105.8154, "step": 4649, "task_loss": 1.6877933740615845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9545075957874419, "compression/movement_sparsity/importance_threshold": -0.00032359193701144654, "compression/movement_sparsity/linear_layer_sparsity": 0.8959811453153474, "compression/movement_sparsity/model_sparsity": 0.8652014366133614, "compression_loss": 101.44207763671875, "distillation_loss": 6.991661071777344, "epoch": 3.93, "learning_rate": 3.3718418333802953e-05, "loss": 106.465, "step": 4650, "task_loss": 3.5328798294067383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9546151427977424, "compression/movement_sparsity/importance_threshold": -0.0003228269445696248, "compression/movement_sparsity/linear_layer_sparsity": 0.8961150417937306, "compression/movement_sparsity/model_sparsity": 0.8653307333357982, "compression_loss": 101.45287322998047, "distillation_loss": 2.3432047367095947, "epoch": 3.93, "learning_rate": 3.371372217526064e-05, "loss": 105.218, "step": 4651, "task_loss": 1.9799916744232178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9547225201755696, "compression/movement_sparsity/importance_threshold": -0.000322063158740238, "compression/movement_sparsity/linear_layer_sparsity": 0.8962362151852462, "compression/movement_sparsity/model_sparsity": 0.8654477440485423, "compression_loss": 101.46366882324219, "distillation_loss": 5.0961151123046875, "epoch": 3.93, "learning_rate": 3.3709026016718326e-05, "loss": 105.7614, "step": 4652, "task_loss": 2.0324363708496094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9548297280548088, "compression/movement_sparsity/importance_threshold": -0.00032130057857094555, "compression/movement_sparsity/linear_layer_sparsity": 0.8963380356526889, "compression/movement_sparsity/model_sparsity": 0.8655460666696919, "compression_loss": 101.47452545166016, "distillation_loss": 6.552958965301514, "epoch": 3.93, "learning_rate": 3.370432985817601e-05, "loss": 106.2819, "step": 4653, "task_loss": 2.9931423664093018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9549367665693451, "compression/movement_sparsity/importance_threshold": -0.000320539203109412, "compression/movement_sparsity/linear_layer_sparsity": 0.8963350903832829, "compression/movement_sparsity/model_sparsity": 0.8655432225793507, "compression_loss": 101.48523712158203, "distillation_loss": 4.911007404327393, "epoch": 3.93, "learning_rate": 3.3699633699633706e-05, "loss": 106.414, "step": 4654, "task_loss": 2.8711748123168945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9550436358530636, "compression/movement_sparsity/importance_threshold": -0.00031977903140329757, "compression/movement_sparsity/linear_layer_sparsity": 0.896454999813029, "compression/movement_sparsity/model_sparsity": 0.8656590127513005, "compression_loss": 101.49601745605469, "distillation_loss": 3.521719455718994, "epoch": 3.93, "learning_rate": 3.3694937541091385e-05, "loss": 105.3559, "step": 4655, "task_loss": 1.6323217153549194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9551503360398497, "compression/movement_sparsity/importance_threshold": -0.00031902006250026337, "compression/movement_sparsity/linear_layer_sparsity": 0.896500156635866, "compression/movement_sparsity/model_sparsity": 0.8657026182983545, "compression_loss": 101.50678253173828, "distillation_loss": 4.058036804199219, "epoch": 3.94, "learning_rate": 3.369024138254908e-05, "loss": 105.6801, "step": 4656, "task_loss": 2.283012866973877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9552568672635882, "compression/movement_sparsity/importance_threshold": -0.0003182622954479731, "compression/movement_sparsity/linear_layer_sparsity": 0.8965144417886939, "compression/movement_sparsity/model_sparsity": 0.8657164127122363, "compression_loss": 101.51750946044922, "distillation_loss": 2.400212287902832, "epoch": 3.94, "learning_rate": 3.3685545224006765e-05, "loss": 105.3307, "step": 4657, "task_loss": 2.1639575958251953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9553632296581643, "compression/movement_sparsity/importance_threshold": -0.0003175057292940887, "compression/movement_sparsity/linear_layer_sparsity": 0.8964933717844813, "compression/movement_sparsity/model_sparsity": 0.8656960665274874, "compression_loss": 101.52815246582031, "distillation_loss": 5.014033794403076, "epoch": 3.94, "learning_rate": 3.368084906546445e-05, "loss": 105.0713, "step": 4658, "task_loss": 3.057401657104492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9554694233574633, "compression/movement_sparsity/importance_threshold": -0.00031675036308627044, "compression/movement_sparsity/linear_layer_sparsity": 0.8965848063019131, "compression/movement_sparsity/model_sparsity": 0.86578435998796, "compression_loss": 101.5388412475586, "distillation_loss": 4.049889087677002, "epoch": 3.94, "learning_rate": 3.367615290692214e-05, "loss": 105.9984, "step": 4659, "task_loss": 2.4681780338287354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9555754484953703, "compression/movement_sparsity/importance_threshold": -0.00031599619587218027, "compression/movement_sparsity/linear_layer_sparsity": 0.896572727120098, "compression/movement_sparsity/model_sparsity": 0.8657726957632, "compression_loss": 101.54946899414062, "distillation_loss": 4.576541423797607, "epoch": 3.94, "learning_rate": 3.3671456748379824e-05, "loss": 105.7474, "step": 4660, "task_loss": 2.448488235473633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9556813052057704, "compression/movement_sparsity/importance_threshold": -0.000315243226699481, "compression/movement_sparsity/linear_layer_sparsity": 0.8966292953713625, "compression/movement_sparsity/model_sparsity": 0.8658273207210093, "compression_loss": 101.56011199951172, "distillation_loss": 6.420583724975586, "epoch": 3.94, "learning_rate": 3.366676058983752e-05, "loss": 105.9007, "step": 4661, "task_loss": 3.5526461601257324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9557869936225487, "compression/movement_sparsity/importance_threshold": -0.00031449145461583464, "compression/movement_sparsity/linear_layer_sparsity": 0.8967396535428324, "compression/movement_sparsity/model_sparsity": 0.8659338877497879, "compression_loss": 101.5706787109375, "distillation_loss": 1.7050888538360596, "epoch": 3.94, "learning_rate": 3.36620644312952e-05, "loss": 105.8374, "step": 4662, "task_loss": 0.8676210641860962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9558925138795905, "compression/movement_sparsity/importance_threshold": -0.0003137408786689014, "compression/movement_sparsity/linear_layer_sparsity": 0.8968752313288522, "compression/movement_sparsity/model_sparsity": 0.8660648080217718, "compression_loss": 101.5811996459961, "distillation_loss": 3.9838080406188965, "epoch": 3.94, "learning_rate": 3.365736827275289e-05, "loss": 106.0388, "step": 4663, "task_loss": 2.3845458030700684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9559978661107807, "compression/movement_sparsity/importance_threshold": -0.0003129914979063458, "compression/movement_sparsity/linear_layer_sparsity": 0.8968879663398872, "compression/movement_sparsity/model_sparsity": 0.8660771055460004, "compression_loss": 101.59178924560547, "distillation_loss": 3.9829397201538086, "epoch": 3.94, "learning_rate": 3.3652672114210576e-05, "loss": 105.5975, "step": 4664, "task_loss": 1.4955649375915527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9561030504500045, "compression/movement_sparsity/importance_threshold": -0.00031224331137582814, "compression/movement_sparsity/linear_layer_sparsity": 0.8968976129915046, "compression/movement_sparsity/model_sparsity": 0.8660864208054582, "compression_loss": 101.60232543945312, "distillation_loss": 4.316474914550781, "epoch": 3.94, "learning_rate": 3.364797595566826e-05, "loss": 105.9533, "step": 4665, "task_loss": 3.3049535751342773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9562080670311472, "compression/movement_sparsity/importance_threshold": -0.0003114963181250095, "compression/movement_sparsity/linear_layer_sparsity": 0.8968852595538339, "compression/movement_sparsity/model_sparsity": 0.866074491746375, "compression_loss": 101.61284637451172, "distillation_loss": 3.953522205352783, "epoch": 3.94, "learning_rate": 3.3643279797125955e-05, "loss": 106.1561, "step": 4666, "task_loss": 1.7240630388259888 }, { "compression/movement_sparsity/importance_regularization_factor": 0.956312915988094, "compression/movement_sparsity/importance_threshold": -0.0003107505172015518, "compression/movement_sparsity/linear_layer_sparsity": 0.8970077565279572, "compression/movement_sparsity/model_sparsity": 0.8661927805725923, "compression_loss": 101.62342071533203, "distillation_loss": 3.9583499431610107, "epoch": 3.94, "learning_rate": 3.363858363858364e-05, "loss": 105.7258, "step": 4667, "task_loss": 2.603484869003296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9564175974547297, "compression/movement_sparsity/importance_threshold": -0.0003100059076531188, "compression/movement_sparsity/linear_layer_sparsity": 0.8969253366812581, "compression/movement_sparsity/model_sparsity": 0.8661131921011804, "compression_loss": 101.6338882446289, "distillation_loss": 3.9954495429992676, "epoch": 3.95, "learning_rate": 3.363388748004133e-05, "loss": 106.0716, "step": 4668, "task_loss": 2.4270598888397217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9565221115649396, "compression/movement_sparsity/importance_threshold": -0.0003092624885273716, "compression/movement_sparsity/linear_layer_sparsity": 0.8970243191968034, "compression/movement_sparsity/model_sparsity": 0.866208774262811, "compression_loss": 101.64441680908203, "distillation_loss": 4.004854202270508, "epoch": 3.95, "learning_rate": 3.3629191321499014e-05, "loss": 105.3939, "step": 4669, "task_loss": 3.004072904586792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.956626458452609, "compression/movement_sparsity/importance_threshold": -0.00030852025887197036, "compression/movement_sparsity/linear_layer_sparsity": 0.8970781210411763, "compression/movement_sparsity/model_sparsity": 0.8662607278483159, "compression_loss": 101.65482330322266, "distillation_loss": 5.338413238525391, "epoch": 3.95, "learning_rate": 3.362449516295671e-05, "loss": 105.7176, "step": 4670, "task_loss": 1.9578741788864136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9567306382516227, "compression/movement_sparsity/importance_threshold": -0.0003077792177345797, "compression/movement_sparsity/linear_layer_sparsity": 0.8971634742331138, "compression/movement_sparsity/model_sparsity": 0.8663431488955332, "compression_loss": 101.66529083251953, "distillation_loss": 4.703983783721924, "epoch": 3.95, "learning_rate": 3.3619799004414394e-05, "loss": 106.378, "step": 4671, "task_loss": 3.031550168991089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9568346510958662, "compression/movement_sparsity/importance_threshold": -0.000307039364162859, "compression/movement_sparsity/linear_layer_sparsity": 0.8972228685121081, "compression/movement_sparsity/model_sparsity": 0.8664005027983258, "compression_loss": 101.67573547363281, "distillation_loss": 4.291777610778809, "epoch": 3.95, "learning_rate": 3.361510284587207e-05, "loss": 105.6906, "step": 4672, "task_loss": 1.9564180374145508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9569384971192245, "compression/movement_sparsity/importance_threshold": -0.00030630069720447104, "compression/movement_sparsity/linear_layer_sparsity": 0.8972651158380419, "compression/movement_sparsity/model_sparsity": 0.8664412987986458, "compression_loss": 101.68608856201172, "distillation_loss": 4.722893714904785, "epoch": 3.95, "learning_rate": 3.3610406687329766e-05, "loss": 105.907, "step": 4673, "task_loss": 2.9461374282836914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9570421764555826, "compression/movement_sparsity/importance_threshold": -0.0003055632159070778, "compression/movement_sparsity/linear_layer_sparsity": 0.8972869370648155, "compression/movement_sparsity/model_sparsity": 0.8664623703991499, "compression_loss": 101.69654846191406, "distillation_loss": 4.460615634918213, "epoch": 3.95, "learning_rate": 3.360571052878745e-05, "loss": 106.0708, "step": 4674, "task_loss": 2.384246826171875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9571456892388259, "compression/movement_sparsity/importance_threshold": -0.0003048269193183413, "compression/movement_sparsity/linear_layer_sparsity": 0.8973191800141029, "compression/movement_sparsity/model_sparsity": 0.8664935057039383, "compression_loss": 101.70687866210938, "distillation_loss": 4.669441223144531, "epoch": 3.95, "learning_rate": 3.3601014370245146e-05, "loss": 105.9401, "step": 4675, "task_loss": 2.6913909912109375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9572490356028394, "compression/movement_sparsity/importance_threshold": -0.00030409180648592344, "compression/movement_sparsity/linear_layer_sparsity": 0.8973175940998074, "compression/movement_sparsity/model_sparsity": 0.8664919742706777, "compression_loss": 101.71723937988281, "distillation_loss": 6.4756622314453125, "epoch": 3.95, "learning_rate": 3.3596318211702825e-05, "loss": 106.6908, "step": 4676, "task_loss": 3.041940927505493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9573522156815082, "compression/movement_sparsity/importance_threshold": -0.00030335787645748534, "compression/movement_sparsity/linear_layer_sparsity": 0.8973418359326111, "compression/movement_sparsity/model_sparsity": 0.866515383321948, "compression_loss": 101.72760009765625, "distillation_loss": 3.1870815753936768, "epoch": 3.95, "learning_rate": 3.359162205316052e-05, "loss": 105.3746, "step": 4677, "task_loss": 2.288619041442871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9574552296087175, "compression/movement_sparsity/importance_threshold": -0.00030262512828068897, "compression/movement_sparsity/linear_layer_sparsity": 0.8973591021273478, "compression/movement_sparsity/model_sparsity": 0.8665320563697785, "compression_loss": 101.7379379272461, "distillation_loss": 4.127124786376953, "epoch": 3.95, "learning_rate": 3.3586925894618205e-05, "loss": 105.4711, "step": 4678, "task_loss": 3.0640594959259033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9575580775183525, "compression/movement_sparsity/importance_threshold": -0.0003018935610031963, "compression/movement_sparsity/linear_layer_sparsity": 0.8973715867308626, "compression/movement_sparsity/model_sparsity": 0.8665441120887554, "compression_loss": 101.74826049804688, "distillation_loss": 4.934437274932861, "epoch": 3.95, "learning_rate": 3.358222973607589e-05, "loss": 105.7181, "step": 4679, "task_loss": 2.4243452548980713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9576607595442982, "compression/movement_sparsity/importance_threshold": -0.000301163173672671, "compression/movement_sparsity/linear_layer_sparsity": 0.8974363707336283, "compression/movement_sparsity/model_sparsity": 0.866606670561727, "compression_loss": 101.758544921875, "distillation_loss": 4.0313720703125, "epoch": 3.96, "learning_rate": 3.357753357753358e-05, "loss": 105.9691, "step": 4680, "task_loss": 2.5609755516052246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9577632758204399, "compression/movement_sparsity/importance_threshold": -0.0003004339653367716, "compression/movement_sparsity/linear_layer_sparsity": 0.8975187190353214, "compression/movement_sparsity/model_sparsity": 0.8666861899459242, "compression_loss": 101.76884460449219, "distillation_loss": 6.23555850982666, "epoch": 3.96, "learning_rate": 3.3572837418991264e-05, "loss": 106.5766, "step": 4681, "task_loss": 3.0011849403381348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9578656264806625, "compression/movement_sparsity/importance_threshold": -0.0002997059350431626, "compression/movement_sparsity/linear_layer_sparsity": 0.8975450475974615, "compression/movement_sparsity/model_sparsity": 0.8667116140409585, "compression_loss": 101.77910614013672, "distillation_loss": 4.0646820068359375, "epoch": 3.96, "learning_rate": 3.356814126044896e-05, "loss": 106.1809, "step": 4682, "task_loss": 1.874554991722107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9579678116588514, "compression/movement_sparsity/importance_threshold": -0.0002989790818395052, "compression/movement_sparsity/linear_layer_sparsity": 0.8976004115077948, "compression/movement_sparsity/model_sparsity": 0.8667650760306526, "compression_loss": 101.7893295288086, "distillation_loss": 4.366635322570801, "epoch": 3.96, "learning_rate": 3.356344510190664e-05, "loss": 106.441, "step": 4683, "task_loss": 1.8499318361282349 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9580698314888917, "compression/movement_sparsity/importance_threshold": -0.0002982534047734596, "compression/movement_sparsity/linear_layer_sparsity": 0.8976551434372434, "compression/movement_sparsity/model_sparsity": 0.8668179277499495, "compression_loss": 101.7995834350586, "distillation_loss": 3.6836118698120117, "epoch": 3.96, "learning_rate": 3.355874894336433e-05, "loss": 105.8485, "step": 4684, "task_loss": 2.9389684200286865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9581716861046683, "compression/movement_sparsity/importance_threshold": -0.00029752890289269033, "compression/movement_sparsity/linear_layer_sparsity": 0.897714919289602, "compression/movement_sparsity/model_sparsity": 0.8668756501178876, "compression_loss": 101.80977630615234, "distillation_loss": 3.5335116386413574, "epoch": 3.96, "learning_rate": 3.3554052784822016e-05, "loss": 105.8275, "step": 4685, "task_loss": 2.2646472454071045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9582733756400666, "compression/movement_sparsity/importance_threshold": -0.00029680557524485677, "compression/movement_sparsity/linear_layer_sparsity": 0.8977484500489941, "compression/movement_sparsity/model_sparsity": 0.8669080289925418, "compression_loss": 101.8199462890625, "distillation_loss": 4.514709949493408, "epoch": 3.96, "learning_rate": 3.35493566262797e-05, "loss": 106.0954, "step": 4686, "task_loss": 3.3270363807678223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9583749002289718, "compression/movement_sparsity/importance_threshold": -0.0002960834208776226, "compression/movement_sparsity/linear_layer_sparsity": 0.8978267679820263, "compression/movement_sparsity/model_sparsity": 0.8669836564636403, "compression_loss": 101.83015441894531, "distillation_loss": 4.895815372467041, "epoch": 3.96, "learning_rate": 3.3544660467737395e-05, "loss": 106.4439, "step": 4687, "task_loss": 2.853973627090454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9584762600052686, "compression/movement_sparsity/importance_threshold": -0.00029536243883864985, "compression/movement_sparsity/linear_layer_sparsity": 0.8977559741987723, "compression/movement_sparsity/model_sparsity": 0.8669152946646281, "compression_loss": 101.84028625488281, "distillation_loss": 3.668389320373535, "epoch": 3.96, "learning_rate": 3.353996430919508e-05, "loss": 106.1546, "step": 4688, "task_loss": 2.217041254043579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9585774551028428, "compression/movement_sparsity/importance_threshold": -0.0002946426281755978, "compression/movement_sparsity/linear_layer_sparsity": 0.8978508667248184, "compression/movement_sparsity/model_sparsity": 0.8670069273404811, "compression_loss": 101.850341796875, "distillation_loss": 4.374927043914795, "epoch": 3.96, "learning_rate": 3.353526815065277e-05, "loss": 106.0479, "step": 4689, "task_loss": 1.6821836233139038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9586784856555789, "compression/movement_sparsity/importance_threshold": -0.0002939239879361311, "compression/movement_sparsity/linear_layer_sparsity": 0.8978676559528498, "compression/movement_sparsity/model_sparsity": 0.8670231398068798, "compression_loss": 101.8604507446289, "distillation_loss": 3.5482447147369385, "epoch": 3.96, "learning_rate": 3.3530571992110454e-05, "loss": 105.1057, "step": 4690, "task_loss": 2.695138931274414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9587793517973624, "compression/movement_sparsity/importance_threshold": -0.0002932065171679099, "compression/movement_sparsity/linear_layer_sparsity": 0.8979243792182935, "compression/movement_sparsity/model_sparsity": 0.8670779144536545, "compression_loss": 101.87054443359375, "distillation_loss": 3.116692543029785, "epoch": 3.96, "learning_rate": 3.352587583356814e-05, "loss": 105.6255, "step": 4691, "task_loss": 2.6246190071105957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9588800536620783, "compression/movement_sparsity/importance_threshold": -0.00029249021491859796, "compression/movement_sparsity/linear_layer_sparsity": 0.8980122484096023, "compression/movement_sparsity/model_sparsity": 0.8671627650679244, "compression_loss": 101.88057708740234, "distillation_loss": 5.669735431671143, "epoch": 3.97, "learning_rate": 3.3521179675025834e-05, "loss": 106.01, "step": 4692, "task_loss": 3.571932792663574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9589805913836119, "compression/movement_sparsity/importance_threshold": -0.0002917750802358546, "compression/movement_sparsity/linear_layer_sparsity": 0.8981074271156717, "compression/movement_sparsity/model_sparsity": 0.8672546740926365, "compression_loss": 101.89060974121094, "distillation_loss": 5.201666355133057, "epoch": 3.97, "learning_rate": 3.3516483516483513e-05, "loss": 106.6069, "step": 4693, "task_loss": 2.8536124229431152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9590809650958482, "compression/movement_sparsity/importance_threshold": -0.00029106111216734357, "compression/movement_sparsity/linear_layer_sparsity": 0.8981193274349722, "compression/movement_sparsity/model_sparsity": 0.8672661655993594, "compression_loss": 101.90062713623047, "distillation_loss": 4.655300140380859, "epoch": 3.97, "learning_rate": 3.3511787357941207e-05, "loss": 106.6638, "step": 4694, "task_loss": 1.7739357948303223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9591811749326724, "compression/movement_sparsity/importance_threshold": -0.00029034830976072505, "compression/movement_sparsity/linear_layer_sparsity": 0.8982408347031817, "compression/movement_sparsity/model_sparsity": 0.8673834987191058, "compression_loss": 101.9106674194336, "distillation_loss": 3.761857748031616, "epoch": 3.97, "learning_rate": 3.350709119939889e-05, "loss": 105.2894, "step": 4695, "task_loss": 3.412358283996582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9592812210279696, "compression/movement_sparsity/importance_threshold": -0.00028963667206366277, "compression/movement_sparsity/linear_layer_sparsity": 0.8983034842799406, "compression/movement_sparsity/model_sparsity": 0.8674439960901703, "compression_loss": 101.92066955566406, "distillation_loss": 3.9954962730407715, "epoch": 3.97, "learning_rate": 3.350239504085658e-05, "loss": 106.1419, "step": 4696, "task_loss": 1.7709993124008179 }, { "compression/movement_sparsity/importance_regularization_factor": 0.959381103515625, "compression/movement_sparsity/importance_threshold": -0.0002889261981238178, "compression/movement_sparsity/linear_layer_sparsity": 0.8983921523904808, "compression/movement_sparsity/model_sparsity": 0.8675296181783385, "compression_loss": 101.9306411743164, "distillation_loss": 4.311000347137451, "epoch": 3.97, "learning_rate": 3.349769888231427e-05, "loss": 105.894, "step": 4697, "task_loss": 3.210758686065674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9594808225295237, "compression/movement_sparsity/importance_threshold": -0.0002882168869888513, "compression/movement_sparsity/linear_layer_sparsity": 0.8985602831541465, "compression/movement_sparsity/model_sparsity": 0.8676919731330415, "compression_loss": 101.94061279296875, "distillation_loss": 5.086540222167969, "epoch": 3.97, "learning_rate": 3.349300272377195e-05, "loss": 106.0711, "step": 4698, "task_loss": 2.801870822906494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9595803782035508, "compression/movement_sparsity/importance_threshold": -0.0002875087377064269, "compression/movement_sparsity/linear_layer_sparsity": 0.8985966280171006, "compression/movement_sparsity/model_sparsity": 0.8677270694381433, "compression_loss": 101.9505844116211, "distillation_loss": 4.194717884063721, "epoch": 3.97, "learning_rate": 3.3488306565229645e-05, "loss": 106.302, "step": 4699, "task_loss": 2.6817238330841064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9596797706715915, "compression/movement_sparsity/importance_threshold": -0.00028680174932420397, "compression/movement_sparsity/linear_layer_sparsity": 0.8986849741751147, "compression/movement_sparsity/model_sparsity": 0.867812380633845, "compression_loss": 101.96051025390625, "distillation_loss": 4.414302349090576, "epoch": 3.97, "learning_rate": 3.348361040668733e-05, "loss": 106.4008, "step": 4700, "task_loss": 1.9028892517089844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9597790000675309, "compression/movement_sparsity/importance_threshold": -0.0002860959208898471, "compression/movement_sparsity/linear_layer_sparsity": 0.8986962067410277, "compression/movement_sparsity/model_sparsity": 0.8678232273265635, "compression_loss": 101.97045135498047, "distillation_loss": 3.559798240661621, "epoch": 3.97, "learning_rate": 3.3478914248145025e-05, "loss": 105.5703, "step": 4701, "task_loss": 2.446654796600342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9598780665252543, "compression/movement_sparsity/importance_threshold": -0.0002853912514510148, "compression/movement_sparsity/linear_layer_sparsity": 0.8987250513025389, "compression/movement_sparsity/model_sparsity": 0.8678510809886505, "compression_loss": 101.98028564453125, "distillation_loss": 5.190405368804932, "epoch": 3.97, "learning_rate": 3.3474218089602704e-05, "loss": 106.549, "step": 4702, "task_loss": 3.281277656555176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9599769701786466, "compression/movement_sparsity/importance_threshold": -0.0002846877400553725, "compression/movement_sparsity/linear_layer_sparsity": 0.8989065967547949, "compression/movement_sparsity/model_sparsity": 0.8680263897961223, "compression_loss": 101.99020385742188, "distillation_loss": 3.6502928733825684, "epoch": 3.97, "learning_rate": 3.346952193106039e-05, "loss": 106.4279, "step": 4703, "task_loss": 2.1916751861572266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9600757111615931, "compression/movement_sparsity/importance_threshold": -0.00028398538575057954, "compression/movement_sparsity/linear_layer_sparsity": 0.8989663368346505, "compression/movement_sparsity/model_sparsity": 0.868084077620453, "compression_loss": 102.00003814697266, "distillation_loss": 3.730922222137451, "epoch": 3.98, "learning_rate": 3.3464825772518084e-05, "loss": 106.7314, "step": 4704, "task_loss": 1.3901718854904175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9601742896079788, "compression/movement_sparsity/importance_threshold": -0.00028328418758429876, "compression/movement_sparsity/linear_layer_sparsity": 0.8989492375782607, "compression/movement_sparsity/model_sparsity": 0.8680675657761237, "compression_loss": 102.00988006591797, "distillation_loss": 3.6987392902374268, "epoch": 3.98, "learning_rate": 3.346012961397577e-05, "loss": 106.2843, "step": 4705, "task_loss": 2.234402894973755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.960272705651689, "compression/movement_sparsity/importance_threshold": -0.0002825841446041921, "compression/movement_sparsity/linear_layer_sparsity": 0.8990653074260282, "compression/movement_sparsity/model_sparsity": 0.8681796482675477, "compression_loss": 102.0196762084961, "distillation_loss": 4.854969024658203, "epoch": 3.98, "learning_rate": 3.3455433455433456e-05, "loss": 106.7314, "step": 4706, "task_loss": 2.7969188690185547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9603709594266087, "compression/movement_sparsity/importance_threshold": -0.0002818852558579216, "compression/movement_sparsity/linear_layer_sparsity": 0.8990593334180427, "compression/movement_sparsity/model_sparsity": 0.8681738794851146, "compression_loss": 102.02948760986328, "distillation_loss": 5.347637176513672, "epoch": 3.98, "learning_rate": 3.345073729689114e-05, "loss": 107.015, "step": 4707, "task_loss": 2.8133673667907715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9604690510666232, "compression/movement_sparsity/importance_threshold": -0.0002811875203931483, "compression/movement_sparsity/linear_layer_sparsity": 0.8990946766509154, "compression/movement_sparsity/model_sparsity": 0.8682080085692097, "compression_loss": 102.03929138183594, "distillation_loss": 3.5451388359069824, "epoch": 3.98, "learning_rate": 3.3446041138348836e-05, "loss": 105.7937, "step": 4708, "task_loss": 1.8816108703613281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9605669807056174, "compression/movement_sparsity/importance_threshold": -0.00028049093725753416, "compression/movement_sparsity/linear_layer_sparsity": 0.8990957617501701, "compression/movement_sparsity/model_sparsity": 0.868209056391967, "compression_loss": 102.04901885986328, "distillation_loss": 4.2058281898498535, "epoch": 3.98, "learning_rate": 3.344134497980652e-05, "loss": 105.9605, "step": 4709, "task_loss": 1.6387507915496826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9606647484774767, "compression/movement_sparsity/importance_threshold": -0.00027979550549874204, "compression/movement_sparsity/linear_layer_sparsity": 0.8991563126734251, "compression/movement_sparsity/model_sparsity": 0.8682675272047317, "compression_loss": 102.05876159667969, "distillation_loss": 4.247766971588135, "epoch": 3.98, "learning_rate": 3.34366488212642e-05, "loss": 106.3702, "step": 4710, "task_loss": 1.9613587856292725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9607623545160862, "compression/movement_sparsity/importance_threshold": -0.00027910122416443216, "compression/movement_sparsity/linear_layer_sparsity": 0.8992678036408204, "compression/movement_sparsity/model_sparsity": 0.8683751881144106, "compression_loss": 102.06848907470703, "distillation_loss": 5.011955261230469, "epoch": 3.98, "learning_rate": 3.3431952662721895e-05, "loss": 107.094, "step": 4711, "task_loss": 2.2600784301757812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9608597989553309, "compression/movement_sparsity/importance_threshold": -0.00027840809230226735, "compression/movement_sparsity/linear_layer_sparsity": 0.8992975782874072, "compression/movement_sparsity/model_sparsity": 0.8684039399102895, "compression_loss": 102.07816314697266, "distillation_loss": 3.3448750972747803, "epoch": 3.98, "learning_rate": 3.342725650417958e-05, "loss": 105.8876, "step": 4712, "task_loss": 2.6370134353637695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.960957081929096, "compression/movement_sparsity/importance_threshold": -0.00027771610895990957, "compression/movement_sparsity/linear_layer_sparsity": 0.8993281399290579, "compression/movement_sparsity/model_sparsity": 0.868433451665531, "compression_loss": 102.08787536621094, "distillation_loss": 4.7758283615112305, "epoch": 3.98, "learning_rate": 3.3422560345637274e-05, "loss": 106.6043, "step": 4713, "task_loss": 2.57338547706604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9610542035712667, "compression/movement_sparsity/importance_threshold": -0.0002770252731850208, "compression/movement_sparsity/linear_layer_sparsity": 0.8994203852898889, "compression/movement_sparsity/model_sparsity": 0.8685225281144375, "compression_loss": 102.09754943847656, "distillation_loss": 4.614522933959961, "epoch": 3.98, "learning_rate": 3.341786418709496e-05, "loss": 106.5029, "step": 4714, "task_loss": 2.737485647201538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.961151164015728, "compression/movement_sparsity/importance_threshold": -0.000276335584025263, "compression/movement_sparsity/linear_layer_sparsity": 0.8994649936010147, "compression/movement_sparsity/model_sparsity": 0.8685656039928449, "compression_loss": 102.1072006225586, "distillation_loss": 4.064095973968506, "epoch": 3.99, "learning_rate": 3.341316802855265e-05, "loss": 106.0881, "step": 4715, "task_loss": 2.7686190605163574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9612479633963652, "compression/movement_sparsity/importance_threshold": -0.00027564704052829726, "compression/movement_sparsity/linear_layer_sparsity": 0.8995077298178216, "compression/movement_sparsity/model_sparsity": 0.8686068720891326, "compression_loss": 102.1168212890625, "distillation_loss": 3.6361286640167236, "epoch": 3.99, "learning_rate": 3.340847187001033e-05, "loss": 106.0324, "step": 4716, "task_loss": 1.2767177820205688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9613446018470634, "compression/movement_sparsity/importance_threshold": -0.00027495964174178555, "compression/movement_sparsity/linear_layer_sparsity": 0.8995320431956312, "compression/movement_sparsity/model_sparsity": 0.8686303502276176, "compression_loss": 102.12641143798828, "distillation_loss": 5.1119890213012695, "epoch": 3.99, "learning_rate": 3.340377571146802e-05, "loss": 107.2166, "step": 4717, "task_loss": 2.126174211502075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9614410795017077, "compression/movement_sparsity/importance_threshold": -0.0002742733867133907, "compression/movement_sparsity/linear_layer_sparsity": 0.8995365624551651, "compression/movement_sparsity/model_sparsity": 0.8686347142366837, "compression_loss": 102.13600158691406, "distillation_loss": 5.101768493652344, "epoch": 3.99, "learning_rate": 3.339907955292571e-05, "loss": 106.3145, "step": 4718, "task_loss": 3.0180108547210693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9615373964941832, "compression/movement_sparsity/importance_threshold": -0.00027358827449077295, "compression/movement_sparsity/linear_layer_sparsity": 0.8996757294156433, "compression/movement_sparsity/model_sparsity": 0.8687691003839418, "compression_loss": 102.14562225341797, "distillation_loss": 3.7479641437530518, "epoch": 3.99, "learning_rate": 3.339438339438339e-05, "loss": 107.3138, "step": 4719, "task_loss": 3.178091526031494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9616335529583752, "compression/movement_sparsity/importance_threshold": -0.0002729043041215951, "compression/movement_sparsity/linear_layer_sparsity": 0.8996997327650943, "compression/movement_sparsity/model_sparsity": 0.8687922791444962, "compression_loss": 102.15518951416016, "distillation_loss": 4.933334827423096, "epoch": 3.99, "learning_rate": 3.3389687235841085e-05, "loss": 106.8682, "step": 4720, "task_loss": 2.630211591720581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9617295490281687, "compression/movement_sparsity/importance_threshold": -0.0002722214746535192, "compression/movement_sparsity/linear_layer_sparsity": 0.8997246185029504, "compression/movement_sparsity/model_sparsity": 0.8688163099806995, "compression_loss": 102.16476440429688, "distillation_loss": 3.553328275680542, "epoch": 3.99, "learning_rate": 3.338499107729877e-05, "loss": 106.7637, "step": 4721, "task_loss": 1.2293570041656494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9618253848374488, "compression/movement_sparsity/importance_threshold": -0.00027153978513420624, "compression/movement_sparsity/linear_layer_sparsity": 0.8998070383496494, "compression/movement_sparsity/model_sparsity": 0.8688958984521113, "compression_loss": 102.17436218261719, "distillation_loss": 4.837475776672363, "epoch": 3.99, "learning_rate": 3.338029491875646e-05, "loss": 106.6123, "step": 4722, "task_loss": 3.2022149562835693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9619210605201008, "compression/movement_sparsity/importance_threshold": -0.0002708592346113191, "compression/movement_sparsity/linear_layer_sparsity": 0.8997659357438086, "compression/movement_sparsity/model_sparsity": 0.8688562078472275, "compression_loss": 102.18386840820312, "distillation_loss": 4.21602725982666, "epoch": 3.99, "learning_rate": 3.3375598760214144e-05, "loss": 106.1003, "step": 4723, "task_loss": 2.403801679611206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9620165762100098, "compression/movement_sparsity/importance_threshold": -0.0002701798221325189, "compression/movement_sparsity/linear_layer_sparsity": 0.8997893309607101, "compression/movement_sparsity/model_sparsity": 0.8688787993664564, "compression_loss": 102.19341278076172, "distillation_loss": 5.929920196533203, "epoch": 3.99, "learning_rate": 3.337090260167183e-05, "loss": 107.0155, "step": 4724, "task_loss": 3.529221534729004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9621119320410608, "compression/movement_sparsity/importance_threshold": -0.00026950154674546846, "compression/movement_sparsity/linear_layer_sparsity": 0.8999433315857274, "compression/movement_sparsity/model_sparsity": 0.8690275095962429, "compression_loss": 102.20293426513672, "distillation_loss": 3.6021406650543213, "epoch": 3.99, "learning_rate": 3.3366206443129524e-05, "loss": 106.7657, "step": 4725, "task_loss": 2.322742462158203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9622071281471392, "compression/movement_sparsity/importance_threshold": -0.000268824407497828, "compression/movement_sparsity/linear_layer_sparsity": 0.9000573266283262, "compression/movement_sparsity/model_sparsity": 0.8691375885584387, "compression_loss": 102.21246337890625, "distillation_loss": 3.6554863452911377, "epoch": 3.99, "learning_rate": 3.336151028458721e-05, "loss": 106.8371, "step": 4726, "task_loss": 1.7232153415679932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9623021646621298, "compression/movement_sparsity/importance_threshold": -0.0002681484034372612, "compression/movement_sparsity/linear_layer_sparsity": 0.9000874113032714, "compression/movement_sparsity/model_sparsity": 0.8691666397322484, "compression_loss": 102.22199249267578, "distillation_loss": 4.970303058624268, "epoch": 4.0, "learning_rate": 3.3356814126044896e-05, "loss": 107.2184, "step": 4727, "task_loss": 2.0545434951782227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9623970417199179, "compression/movement_sparsity/importance_threshold": -0.0002674735336114292, "compression/movement_sparsity/linear_layer_sparsity": 0.9000520799945664, "compression/movement_sparsity/model_sparsity": 0.8691325221626891, "compression_loss": 102.23147583007812, "distillation_loss": 4.454118728637695, "epoch": 4.0, "learning_rate": 3.335211796750258e-05, "loss": 106.9423, "step": 4728, "task_loss": 3.8031768798828125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9624917594543888, "compression/movement_sparsity/importance_threshold": -0.00026679979706799306, "compression/movement_sparsity/linear_layer_sparsity": 0.9000883652366823, "compression/movement_sparsity/model_sparsity": 0.8691675608951119, "compression_loss": 102.24095153808594, "distillation_loss": 3.409276008605957, "epoch": 4.0, "learning_rate": 3.334742180896027e-05, "loss": 106.302, "step": 4729, "task_loss": 0.891934871673584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9625863179994274, "compression/movement_sparsity/importance_threshold": -0.0002661271928546165, "compression/movement_sparsity/linear_layer_sparsity": 0.9001846528903419, "compression/movement_sparsity/model_sparsity": 0.8692605407716528, "compression_loss": 102.25040435791016, "distillation_loss": 4.983661651611328, "epoch": 4.0, "learning_rate": 3.334272565041796e-05, "loss": 107.244, "step": 4730, "task_loss": 3.7979609966278076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9626807174889191, "compression/movement_sparsity/importance_threshold": -0.00026545572001895803, "compression/movement_sparsity/linear_layer_sparsity": 0.900306172082719, "compression/movement_sparsity/model_sparsity": 0.869377885405935, "compression_loss": 102.25987243652344, "distillation_loss": 4.497596740722656, "epoch": 4.0, "learning_rate": 3.333802949187565e-05, "loss": 105.9257, "step": 4731, "task_loss": 2.768622636795044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9627749580567488, "compression/movement_sparsity/importance_threshold": -0.0002647853776086822, "compression/movement_sparsity/linear_layer_sparsity": 0.9002878088645597, "compression/movement_sparsity/model_sparsity": 0.8693601530208114, "compression_loss": 102.26931762695312, "distillation_loss": 3.8900651931762695, "epoch": 4.0, "learning_rate": 3.3333333333333335e-05, "loss": 106.4646, "step": 4732, "task_loss": 3.0103354454040527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9629629629629629, "compression/movement_sparsity/importance_threshold": -0.0002634480802549263, "compression/movement_sparsity/linear_layer_sparsity": 0.9004133584255978, "compression/movement_sparsity/model_sparsity": 0.8694813895681921, "compression_loss": 102.28870391845703, "distillation_loss": 2.9190683364868164, "epoch": 4.0, "learning_rate": 3.332863717479102e-05, "loss": 186.2149, "step": 4733, "task_loss": 1.829244613647461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9630567275691178, "compression/movement_sparsity/importance_threshold": -0.00026278112340676757, "compression/movement_sparsity/linear_layer_sparsity": 0.9004285498151658, "compression/movement_sparsity/model_sparsity": 0.8694960590867944, "compression_loss": 102.298095703125, "distillation_loss": 4.785494804382324, "epoch": 4.0, "learning_rate": 3.332394101624871e-05, "loss": 106.2784, "step": 4734, "task_loss": 2.9157838821411133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9631503337891512, "compression/movement_sparsity/importance_threshold": -0.00026211529317463934, "compression/movement_sparsity/linear_layer_sparsity": 0.9004919029178152, "compression/movement_sparsity/model_sparsity": 0.8695572358154707, "compression_loss": 102.30741119384766, "distillation_loss": 4.2503252029418945, "epoch": 4.0, "learning_rate": 3.33192448577064e-05, "loss": 106.5491, "step": 4735, "task_loss": 3.216562032699585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9632437817569484, "compression/movement_sparsity/importance_threshold": -0.00026145058860620185, "compression/movement_sparsity/linear_layer_sparsity": 0.9005420678910593, "compression/movement_sparsity/model_sparsity": 0.8696056774675583, "compression_loss": 102.31678009033203, "distillation_loss": 5.184864044189453, "epoch": 4.0, "learning_rate": 3.331454869916408e-05, "loss": 106.9435, "step": 4736, "task_loss": 3.6527364253997803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9633370716063946, "compression/movement_sparsity/importance_threshold": -0.00026078700874911705, "compression/movement_sparsity/linear_layer_sparsity": 0.9005924117268179, "compression/movement_sparsity/model_sparsity": 0.869654291837683, "compression_loss": 102.32599639892578, "distillation_loss": 4.254373550415039, "epoch": 4.0, "learning_rate": 3.3309852540621773e-05, "loss": 106.4887, "step": 4737, "task_loss": 2.662250518798828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9634302034713748, "compression/movement_sparsity/importance_threshold": -0.0002601245526510478, "compression/movement_sparsity/linear_layer_sparsity": 0.9005852810745717, "compression/movement_sparsity/model_sparsity": 0.8696474061452778, "compression_loss": 102.33535766601562, "distillation_loss": 3.5354549884796143, "epoch": 4.01, "learning_rate": 3.330515638207946e-05, "loss": 106.4871, "step": 4738, "task_loss": 1.9330536127090454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9635231774857742, "compression/movement_sparsity/importance_threshold": -0.000259463219359656, "compression/movement_sparsity/linear_layer_sparsity": 0.9006951145826656, "compression/movement_sparsity/model_sparsity": 0.8697534665344814, "compression_loss": 102.34465026855469, "distillation_loss": 3.850691318511963, "epoch": 4.01, "learning_rate": 3.330046022353715e-05, "loss": 107.1539, "step": 4739, "task_loss": 2.2513232231140137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9636159937834781, "compression/movement_sparsity/importance_threshold": -0.00025880300792260197, "compression/movement_sparsity/linear_layer_sparsity": 0.9006986441362858, "compression/movement_sparsity/model_sparsity": 0.8697568748370765, "compression_loss": 102.35398864746094, "distillation_loss": 3.675567626953125, "epoch": 4.01, "learning_rate": 3.329576406499483e-05, "loss": 106.2712, "step": 4740, "task_loss": 2.532457113265991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9637086524983712, "compression/movement_sparsity/importance_threshold": -0.00025814391738754935, "compression/movement_sparsity/linear_layer_sparsity": 0.9008171584384186, "compression/movement_sparsity/model_sparsity": 0.8698713178083385, "compression_loss": 102.36324310302734, "distillation_loss": 4.10349702835083, "epoch": 4.01, "learning_rate": 3.3291067906452526e-05, "loss": 106.2888, "step": 4741, "task_loss": 1.265009880065918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9638011537643392, "compression/movement_sparsity/importance_threshold": -0.0002574859468021584, "compression/movement_sparsity/linear_layer_sparsity": 0.9009101430976431, "compression/movement_sparsity/model_sparsity": 0.8699611081584643, "compression_loss": 102.37247467041016, "distillation_loss": 5.355730056762695, "epoch": 4.01, "learning_rate": 3.328637174791021e-05, "loss": 107.2227, "step": 4742, "task_loss": 3.0425591468811035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9638934977152669, "compression/movement_sparsity/importance_threshold": -0.0002568290952140919, "compression/movement_sparsity/linear_layer_sparsity": 0.9009550495129598, "compression/movement_sparsity/model_sparsity": 0.8700044719002665, "compression_loss": 102.3816909790039, "distillation_loss": 3.1084837913513184, "epoch": 4.01, "learning_rate": 3.32816755893679e-05, "loss": 105.853, "step": 4743, "task_loss": 1.3634649515151978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9639856844850394, "compression/movement_sparsity/importance_threshold": -0.0002561733616710119, "compression/movement_sparsity/linear_layer_sparsity": 0.901013525631046, "compression/movement_sparsity/model_sparsity": 0.870060939183803, "compression_loss": 102.39088439941406, "distillation_loss": 4.1299028396606445, "epoch": 4.01, "learning_rate": 3.327697943082559e-05, "loss": 106.8346, "step": 4744, "task_loss": 2.318711519241333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9640777142075421, "compression/movement_sparsity/importance_threshold": -0.0002555187452205794, "compression/movement_sparsity/linear_layer_sparsity": 0.9010424298133953, "compression/movement_sparsity/model_sparsity": 0.8700888504185689, "compression_loss": 102.40007019042969, "distillation_loss": 4.769927978515625, "epoch": 4.01, "learning_rate": 3.327228327228327e-05, "loss": 106.776, "step": 4745, "task_loss": 3.13997220993042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9641695870166599, "compression/movement_sparsity/importance_threshold": -0.0002548652449104573, "compression/movement_sparsity/linear_layer_sparsity": 0.9011312767864502, "compression/movement_sparsity/model_sparsity": 0.8701746452247741, "compression_loss": 102.4093017578125, "distillation_loss": 4.151796340942383, "epoch": 4.01, "learning_rate": 3.3267587113740964e-05, "loss": 107.0295, "step": 4746, "task_loss": 2.591557264328003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9642613030462781, "compression/movement_sparsity/importance_threshold": -0.00025421285978830584, "compression/movement_sparsity/linear_layer_sparsity": 0.9010356807345135, "compression/movement_sparsity/model_sparsity": 0.8700823331913092, "compression_loss": 102.41847229003906, "distillation_loss": 4.960771083831787, "epoch": 4.01, "learning_rate": 3.326289095519865e-05, "loss": 107.2625, "step": 4747, "task_loss": 2.8601677417755127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9643528624302817, "compression/movement_sparsity/importance_threshold": -0.0002535615889017887, "compression/movement_sparsity/linear_layer_sparsity": 0.9010463767128828, "compression/movement_sparsity/model_sparsity": 0.870092661729917, "compression_loss": 102.42767333984375, "distillation_loss": 5.119833946228027, "epoch": 4.01, "learning_rate": 3.325819479665634e-05, "loss": 107.3052, "step": 4748, "task_loss": 3.3765525817871094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.964444265302556, "compression/movement_sparsity/importance_threshold": -0.00025291143129856693, "compression/movement_sparsity/linear_layer_sparsity": 0.9010796689889221, "compression/movement_sparsity/model_sparsity": 0.8701248103138552, "compression_loss": 102.43682861328125, "distillation_loss": 5.133080005645752, "epoch": 4.01, "learning_rate": 3.325349863811402e-05, "loss": 107.0258, "step": 4749, "task_loss": 2.9687411785125732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.964535511796986, "compression/movement_sparsity/importance_threshold": -0.0002522623860263017, "compression/movement_sparsity/linear_layer_sparsity": 0.9011302870805364, "compression/movement_sparsity/model_sparsity": 0.8701736895183031, "compression_loss": 102.44601440429688, "distillation_loss": 4.7289605140686035, "epoch": 4.02, "learning_rate": 3.324880247957171e-05, "loss": 106.5546, "step": 4750, "task_loss": 2.7802562713623047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.964626602047457, "compression/movement_sparsity/importance_threshold": -0.00025161445213265667, "compression/movement_sparsity/linear_layer_sparsity": 0.9011175043728308, "compression/movement_sparsity/model_sparsity": 0.8701613459359313, "compression_loss": 102.45523071289062, "distillation_loss": 5.018883228302002, "epoch": 4.02, "learning_rate": 3.32441063210294e-05, "loss": 106.482, "step": 4751, "task_loss": 2.119647741317749 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9647175361878539, "compression/movement_sparsity/importance_threshold": -0.0002509676286652912, "compression/movement_sparsity/linear_layer_sparsity": 0.9011239791958571, "compression/movement_sparsity/model_sparsity": 0.8701675983288678, "compression_loss": 102.4643325805664, "distillation_loss": 3.9745264053344727, "epoch": 4.02, "learning_rate": 3.323941016248709e-05, "loss": 107.2646, "step": 4752, "task_loss": 1.7498211860656738 }, { "compression/movement_sparsity/importance_regularization_factor": 0.964808314352062, "compression/movement_sparsity/importance_threshold": -0.0002503219146718699, "compression/movement_sparsity/linear_layer_sparsity": 0.9012124565197152, "compression/movement_sparsity/model_sparsity": 0.8702530361844633, "compression_loss": 102.47344970703125, "distillation_loss": 3.2695021629333496, "epoch": 4.02, "learning_rate": 3.3234714003944775e-05, "loss": 106.5152, "step": 4753, "task_loss": 1.7278215885162354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9648989366739665, "compression/movement_sparsity/importance_threshold": -0.0002496773092000521, "compression/movement_sparsity/linear_layer_sparsity": 0.9012407167970121, "compression/movement_sparsity/model_sparsity": 0.8702803256342964, "compression_loss": 102.48247528076172, "distillation_loss": 3.5609443187713623, "epoch": 4.02, "learning_rate": 3.323001784540246e-05, "loss": 107.07, "step": 4754, "task_loss": 1.8468430042266846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9649894032874524, "compression/movement_sparsity/importance_threshold": -0.00024903381129750147, "compression/movement_sparsity/linear_layer_sparsity": 0.9012988471142369, "compression/movement_sparsity/model_sparsity": 0.8703364589962947, "compression_loss": 102.49156951904297, "distillation_loss": 5.16026496887207, "epoch": 4.02, "learning_rate": 3.322532168686015e-05, "loss": 106.2366, "step": 4755, "task_loss": 2.630140542984009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.965079714326405, "compression/movement_sparsity/importance_threshold": -0.0002483914200118783, "compression/movement_sparsity/linear_layer_sparsity": 0.9013138596412905, "compression/movement_sparsity/model_sparsity": 0.8703509557968601, "compression_loss": 102.5005874633789, "distillation_loss": 4.805792808532715, "epoch": 4.02, "learning_rate": 3.322062552831784e-05, "loss": 107.1155, "step": 4756, "task_loss": 2.4747583866119385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9651698699247092, "compression/movement_sparsity/importance_threshold": -0.0002477501343908471, "compression/movement_sparsity/linear_layer_sparsity": 0.901382614391879, "compression/movement_sparsity/model_sparsity": 0.8704173486102513, "compression_loss": 102.50962829589844, "distillation_loss": 5.212522983551025, "epoch": 4.02, "learning_rate": 3.321592936977552e-05, "loss": 107.1623, "step": 4757, "task_loss": 2.8482134342193604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9652598702162504, "compression/movement_sparsity/importance_threshold": -0.00024710995348206644, "compression/movement_sparsity/linear_layer_sparsity": 0.901453634734318, "compression/movement_sparsity/model_sparsity": 0.8704859291854437, "compression_loss": 102.51860046386719, "distillation_loss": 4.317405700683594, "epoch": 4.02, "learning_rate": 3.3211233211233214e-05, "loss": 107.4412, "step": 4758, "task_loss": 1.6333692073822021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9653497153349135, "compression/movement_sparsity/importance_threshold": -0.00024647087633319995, "compression/movement_sparsity/linear_layer_sparsity": 0.9014781269746421, "compression/movement_sparsity/model_sparsity": 0.8705095800419657, "compression_loss": 102.52759552001953, "distillation_loss": 3.158538341522217, "epoch": 4.02, "learning_rate": 3.32065370526909e-05, "loss": 106.6287, "step": 4759, "task_loss": 1.5984628200531006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9654394054145838, "compression/movement_sparsity/importance_threshold": -0.00024583290199190963, "compression/movement_sparsity/linear_layer_sparsity": 0.9015105010897735, "compression/movement_sparsity/model_sparsity": 0.8705408420066478, "compression_loss": 102.53653717041016, "distillation_loss": 2.8211328983306885, "epoch": 4.02, "learning_rate": 3.3201840894148586e-05, "loss": 106.2219, "step": 4760, "task_loss": 1.6519402265548706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9655289405891465, "compression/movement_sparsity/importance_threshold": -0.00024519602950585657, "compression/movement_sparsity/linear_layer_sparsity": 0.9016173058592879, "compression/movement_sparsity/model_sparsity": 0.8706439777037596, "compression_loss": 102.54547119140625, "distillation_loss": 4.506186485290527, "epoch": 4.02, "learning_rate": 3.319714473560628e-05, "loss": 106.9628, "step": 4761, "task_loss": 3.3729665279388428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9656183209924866, "compression/movement_sparsity/importance_threshold": -0.0002445602579227036, "compression/movement_sparsity/linear_layer_sparsity": 0.9016272744634315, "compression/movement_sparsity/model_sparsity": 0.8706536038556838, "compression_loss": 102.5543212890625, "distillation_loss": 5.024531364440918, "epoch": 4.03, "learning_rate": 3.319244857706396e-05, "loss": 107.894, "step": 4762, "task_loss": 2.1364972591400146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9657075467584892, "compression/movement_sparsity/importance_threshold": -0.00024392558629011095, "compression/movement_sparsity/linear_layer_sparsity": 0.9017295957459148, "compression/movement_sparsity/model_sparsity": 0.8707524100873367, "compression_loss": 102.56321716308594, "distillation_loss": 3.555490493774414, "epoch": 4.03, "learning_rate": 3.318775241852165e-05, "loss": 106.6424, "step": 4763, "task_loss": 1.384401798248291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9657966180210396, "compression/movement_sparsity/importance_threshold": -0.00024329201365574232, "compression/movement_sparsity/linear_layer_sparsity": 0.9018197305290744, "compression/movement_sparsity/model_sparsity": 0.8708394484634077, "compression_loss": 102.57205200195312, "distillation_loss": 4.983199119567871, "epoch": 4.03, "learning_rate": 3.318305625997934e-05, "loss": 107.832, "step": 4764, "task_loss": 1.9071258306503296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9658855349140228, "compression/movement_sparsity/importance_threshold": -0.00024265953906725795, "compression/movement_sparsity/linear_layer_sparsity": 0.9019036408967279, "compression/movement_sparsity/model_sparsity": 0.8709204762517938, "compression_loss": 102.58091735839844, "distillation_loss": 5.193002700805664, "epoch": 4.03, "learning_rate": 3.317836010143703e-05, "loss": 107.2247, "step": 4765, "task_loss": 3.0150513648986816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9659742975713241, "compression/movement_sparsity/importance_threshold": -0.00024202816157232066, "compression/movement_sparsity/linear_layer_sparsity": 0.9019495370179583, "compression/movement_sparsity/model_sparsity": 0.870964795700067, "compression_loss": 102.58973693847656, "distillation_loss": 5.5270490646362305, "epoch": 4.03, "learning_rate": 3.317366394289471e-05, "loss": 107.2219, "step": 4766, "task_loss": 3.0254666805267334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9660629061268285, "compression/movement_sparsity/importance_threshold": -0.00024139788021859156, "compression/movement_sparsity/linear_layer_sparsity": 0.9020517629071007, "compression/movement_sparsity/model_sparsity": 0.8710635098154336, "compression_loss": 102.59857177734375, "distillation_loss": 5.466241836547852, "epoch": 4.03, "learning_rate": 3.31689677843524e-05, "loss": 106.8506, "step": 4767, "task_loss": 2.6275339126586914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9661513607144212, "compression/movement_sparsity/importance_threshold": -0.00024076869405373433, "compression/movement_sparsity/linear_layer_sparsity": 0.9021123138303555, "compression/movement_sparsity/model_sparsity": 0.8711219806281983, "compression_loss": 102.60741424560547, "distillation_loss": 5.616883277893066, "epoch": 4.03, "learning_rate": 3.316427162581009e-05, "loss": 106.5049, "step": 4768, "task_loss": 2.5249617099761963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9662396614679873, "compression/movement_sparsity/importance_threshold": -0.00024014060212540836, "compression/movement_sparsity/linear_layer_sparsity": 0.9021002227243727, "compression/movement_sparsity/model_sparsity": 0.8711103048889026, "compression_loss": 102.61621856689453, "distillation_loss": 5.288245677947998, "epoch": 4.03, "learning_rate": 3.315957546726778e-05, "loss": 106.8402, "step": 4769, "task_loss": 2.257072925567627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9663278085214121, "compression/movement_sparsity/importance_threshold": -0.00023951360348127646, "compression/movement_sparsity/linear_layer_sparsity": 0.9020923170012302, "compression/movement_sparsity/model_sparsity": 0.8711026707516708, "compression_loss": 102.62500762939453, "distillation_loss": 5.482517242431641, "epoch": 4.03, "learning_rate": 3.315487930872546e-05, "loss": 107.9208, "step": 4770, "task_loss": 3.110496997833252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9664158020085805, "compression/movement_sparsity/importance_threshold": -0.00023888769716900148, "compression/movement_sparsity/linear_layer_sparsity": 0.902123856424627, "compression/movement_sparsity/model_sparsity": 0.8711331266988475, "compression_loss": 102.6338119506836, "distillation_loss": 5.543405055999756, "epoch": 4.03, "learning_rate": 3.315018315018315e-05, "loss": 107.821, "step": 4771, "task_loss": 2.4873151779174805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9665036420633777, "compression/movement_sparsity/importance_threshold": -0.0002382628822362445, "compression/movement_sparsity/linear_layer_sparsity": 0.9021823802393839, "compression/movement_sparsity/model_sparsity": 0.871189640040527, "compression_loss": 102.6426010131836, "distillation_loss": 4.256648540496826, "epoch": 4.03, "learning_rate": 3.314548699164084e-05, "loss": 106.8174, "step": 4772, "task_loss": 2.028536558151245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9665913288196889, "compression/movement_sparsity/importance_threshold": -0.0002376391577306675, "compression/movement_sparsity/linear_layer_sparsity": 0.9022809931057324, "compression/movement_sparsity/model_sparsity": 0.8712848652515479, "compression_loss": 102.6513671875, "distillation_loss": 4.546026706695557, "epoch": 4.03, "learning_rate": 3.314079083309853e-05, "loss": 106.5162, "step": 4773, "task_loss": 2.7588484287261963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9666788624113993, "compression/movement_sparsity/importance_threshold": -0.00023701652269993156, "compression/movement_sparsity/linear_layer_sparsity": 0.9022749237044058, "compression/movement_sparsity/model_sparsity": 0.8712790043528285, "compression_loss": 102.66007995605469, "distillation_loss": 6.574051856994629, "epoch": 4.04, "learning_rate": 3.3136094674556215e-05, "loss": 107.4424, "step": 4774, "task_loss": 3.2350826263427734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.966766242972394, "compression/movement_sparsity/importance_threshold": -0.00023639497619169867, "compression/movement_sparsity/linear_layer_sparsity": 0.902314702727639, "compression/movement_sparsity/model_sparsity": 0.871317416844239, "compression_loss": 102.66877746582031, "distillation_loss": 4.020795822143555, "epoch": 4.04, "learning_rate": 3.31313985160139e-05, "loss": 106.3753, "step": 4775, "task_loss": 2.1316864490509033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.966853470636558, "compression/movement_sparsity/importance_threshold": -0.00023577451725363251, "compression/movement_sparsity/linear_layer_sparsity": 0.9024109307604604, "compression/movement_sparsity/model_sparsity": 0.871410339148101, "compression_loss": 102.67747497558594, "distillation_loss": 5.224225044250488, "epoch": 4.04, "learning_rate": 3.312670235747159e-05, "loss": 107.1369, "step": 4776, "task_loss": 2.667006254196167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9669405455377766, "compression/movement_sparsity/importance_threshold": -0.00023515514493339246, "compression/movement_sparsity/linear_layer_sparsity": 0.9024607380086754, "compression/movement_sparsity/model_sparsity": 0.8714584353641148, "compression_loss": 102.68614196777344, "distillation_loss": 5.827061653137207, "epoch": 4.04, "learning_rate": 3.312200619892928e-05, "loss": 107.0798, "step": 4777, "task_loss": 3.38424015045166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9670274678099349, "compression/movement_sparsity/importance_threshold": -0.00023453685827864307, "compression/movement_sparsity/linear_layer_sparsity": 0.9025097463376589, "compression/movement_sparsity/model_sparsity": 0.8715057601062304, "compression_loss": 102.69473266601562, "distillation_loss": 5.899693012237549, "epoch": 4.04, "learning_rate": 3.311731004038697e-05, "loss": 107.7298, "step": 4778, "task_loss": 3.567085027694702 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9671142375869181, "compression/movement_sparsity/importance_threshold": -0.00023391965633704284, "compression/movement_sparsity/linear_layer_sparsity": 0.902630347369128, "compression/movement_sparsity/model_sparsity": 0.8716222181212563, "compression_loss": 102.7033462524414, "distillation_loss": 5.706391334533691, "epoch": 4.04, "learning_rate": 3.3112613881844654e-05, "loss": 107.8041, "step": 4779, "task_loss": 2.6814281940460205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9672008550026112, "compression/movement_sparsity/importance_threshold": -0.00023330353815625548, "compression/movement_sparsity/linear_layer_sparsity": 0.9026504515157621, "compression/movement_sparsity/model_sparsity": 0.871641631628606, "compression_loss": 102.71197509765625, "distillation_loss": 3.686361312866211, "epoch": 4.04, "learning_rate": 3.310791772330234e-05, "loss": 106.7174, "step": 4780, "task_loss": 2.1846609115600586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9672873201908995, "compression/movement_sparsity/importance_threshold": -0.00023268850278394294, "compression/movement_sparsity/linear_layer_sparsity": 0.9026535756476827, "compression/movement_sparsity/model_sparsity": 0.8716446484369841, "compression_loss": 102.72052001953125, "distillation_loss": 4.632335662841797, "epoch": 4.04, "learning_rate": 3.3103221564760027e-05, "loss": 106.8838, "step": 4781, "task_loss": 2.9034616947174072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9673736332856679, "compression/movement_sparsity/importance_threshold": -0.0002320745492677672, "compression/movement_sparsity/linear_layer_sparsity": 0.9026985774563404, "compression/movement_sparsity/model_sparsity": 0.8716881042950727, "compression_loss": 102.72909545898438, "distillation_loss": 6.185647964477539, "epoch": 4.04, "learning_rate": 3.309852540621772e-05, "loss": 108.0445, "step": 4782, "task_loss": 2.596698045730591 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9674597944208019, "compression/movement_sparsity/importance_threshold": -0.00023146167665539022, "compression/movement_sparsity/linear_layer_sparsity": 0.902721936900739, "compression/movement_sparsity/model_sparsity": 0.8717106612706942, "compression_loss": 102.7376480102539, "distillation_loss": 3.898094654083252, "epoch": 4.04, "learning_rate": 3.30938292476754e-05, "loss": 106.6785, "step": 4783, "task_loss": 1.920641541481018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9675458037301863, "compression/movement_sparsity/importance_threshold": -0.0002308498839944731, "compression/movement_sparsity/linear_layer_sparsity": 0.9027866255101636, "compression/movement_sparsity/model_sparsity": 0.8717731276273796, "compression_loss": 102.74618530273438, "distillation_loss": 4.589359760284424, "epoch": 4.04, "learning_rate": 3.308913308913309e-05, "loss": 107.0361, "step": 4784, "task_loss": 2.5735082626342773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9676316613477064, "compression/movement_sparsity/importance_threshold": -0.0002302391703326778, "compression/movement_sparsity/linear_layer_sparsity": 0.9028557260616135, "compression/movement_sparsity/model_sparsity": 0.8718398543623089, "compression_loss": 102.75472259521484, "distillation_loss": 4.670665264129639, "epoch": 4.04, "learning_rate": 3.308443693059078e-05, "loss": 107.1349, "step": 4785, "task_loss": 3.2206523418426514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9677173674072472, "compression/movement_sparsity/importance_threshold": -0.00022962953471766714, "compression/movement_sparsity/linear_layer_sparsity": 0.9028924763462671, "compression/movement_sparsity/model_sparsity": 0.8718753421616278, "compression_loss": 102.76321411132812, "distillation_loss": 4.4252848625183105, "epoch": 4.05, "learning_rate": 3.3079740772048465e-05, "loss": 107.2103, "step": 4786, "task_loss": 2.9942381381988525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9678029220426941, "compression/movement_sparsity/importance_threshold": -0.00022902097619710137, "compression/movement_sparsity/linear_layer_sparsity": 0.9029059506556957, "compression/movement_sparsity/model_sparsity": 0.8718883535870755, "compression_loss": 102.77167510986328, "distillation_loss": 4.637772560119629, "epoch": 4.05, "learning_rate": 3.307504461350615e-05, "loss": 107.4327, "step": 4787, "task_loss": 2.7259020805358887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9678883253879321, "compression/movement_sparsity/importance_threshold": -0.00022841349381864332, "compression/movement_sparsity/linear_layer_sparsity": 0.9029459085414434, "compression/movement_sparsity/model_sparsity": 0.871926938796523, "compression_loss": 102.78015899658203, "distillation_loss": 4.240678787231445, "epoch": 4.05, "learning_rate": 3.307034845496384e-05, "loss": 106.722, "step": 4788, "task_loss": 2.0183002948760986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9679735775768464, "compression/movement_sparsity/importance_threshold": -0.0002278070866299541, "compression/movement_sparsity/linear_layer_sparsity": 0.9029095398301541, "compression/movement_sparsity/model_sparsity": 0.8718918194623497, "compression_loss": 102.78855895996094, "distillation_loss": 5.896967887878418, "epoch": 4.05, "learning_rate": 3.306565229642153e-05, "loss": 107.455, "step": 4789, "task_loss": 3.0924248695373535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.968058678743322, "compression/movement_sparsity/importance_threshold": -0.00022720175367869737, "compression/movement_sparsity/linear_layer_sparsity": 0.90294012532014, "compression/movement_sparsity/model_sparsity": 0.8719213542466627, "compression_loss": 102.79707336425781, "distillation_loss": 4.53794527053833, "epoch": 4.05, "learning_rate": 3.306095613787922e-05, "loss": 107.4401, "step": 4790, "task_loss": 2.724515914916992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9681436290212441, "compression/movement_sparsity/importance_threshold": -0.0002265974940125334, "compression/movement_sparsity/linear_layer_sparsity": 0.9029535877054009, "compression/movement_sparsity/model_sparsity": 0.8719343541575747, "compression_loss": 102.80551147460938, "distillation_loss": 4.835347652435303, "epoch": 4.05, "learning_rate": 3.3056259979336904e-05, "loss": 107.0113, "step": 4791, "task_loss": 2.26074481010437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.968228428544498, "compression/movement_sparsity/importance_threshold": -0.00022599430667912326, "compression/movement_sparsity/linear_layer_sparsity": 0.9029960019696817, "compression/movement_sparsity/model_sparsity": 0.8719753113613959, "compression_loss": 102.81392669677734, "distillation_loss": 4.936439037322998, "epoch": 4.05, "learning_rate": 3.305156382079459e-05, "loss": 107.5429, "step": 4792, "task_loss": 2.453190326690674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9683130774469686, "compression/movement_sparsity/importance_threshold": -0.00022539219072613068, "compression/movement_sparsity/linear_layer_sparsity": 0.9030544065427621, "compression/movement_sparsity/model_sparsity": 0.8720317095577176, "compression_loss": 102.82235717773438, "distillation_loss": 4.65719747543335, "epoch": 4.05, "learning_rate": 3.3046867662252276e-05, "loss": 107.7776, "step": 4793, "task_loss": 3.4132323265075684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.968397575862541, "compression/movement_sparsity/importance_threshold": -0.00022479114520121846, "compression/movement_sparsity/linear_layer_sparsity": 0.9030512585625063, "compression/movement_sparsity/model_sparsity": 0.8720286697202678, "compression_loss": 102.83084106445312, "distillation_loss": 3.4845387935638428, "epoch": 4.05, "learning_rate": 3.304217150370997e-05, "loss": 107.3568, "step": 4794, "task_loss": 1.6136727333068848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9684819239251007, "compression/movement_sparsity/importance_threshold": -0.00022419116915204512, "compression/movement_sparsity/linear_layer_sparsity": 0.9031414410423363, "compression/movement_sparsity/model_sparsity": 0.872115754154482, "compression_loss": 102.83924865722656, "distillation_loss": 4.073892593383789, "epoch": 4.05, "learning_rate": 3.3037475345167656e-05, "loss": 107.2926, "step": 4795, "task_loss": 1.556467056274414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9685661217685325, "compression/movement_sparsity/importance_threshold": -0.00022359226162627435, "compression/movement_sparsity/linear_layer_sparsity": 0.9031993328762085, "compression/movement_sparsity/model_sparsity": 0.8721716572257644, "compression_loss": 102.84768676757812, "distillation_loss": 4.239499092102051, "epoch": 4.05, "learning_rate": 3.303277918662534e-05, "loss": 106.5626, "step": 4796, "task_loss": 3.5179688930511475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9686501695267218, "compression/movement_sparsity/importance_threshold": -0.00022299442167156812, "compression/movement_sparsity/linear_layer_sparsity": 0.9032432376614437, "compression/movement_sparsity/model_sparsity": 0.8722140537465599, "compression_loss": 102.85601806640625, "distillation_loss": 4.01783561706543, "epoch": 4.05, "learning_rate": 3.302808302808303e-05, "loss": 106.5951, "step": 4797, "task_loss": 1.726696491241455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9687340673335535, "compression/movement_sparsity/importance_threshold": -0.00022239764833558752, "compression/movement_sparsity/linear_layer_sparsity": 0.903360821878501, "compression/movement_sparsity/model_sparsity": 0.8723275985840299, "compression_loss": 102.86431884765625, "distillation_loss": 5.887246131896973, "epoch": 4.06, "learning_rate": 3.3023386869540715e-05, "loss": 107.2446, "step": 4798, "task_loss": 2.5339858531951904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9688178153229129, "compression/movement_sparsity/importance_threshold": -0.0002218019406659954, "compression/movement_sparsity/linear_layer_sparsity": 0.9034034030811287, "compression/movement_sparsity/model_sparsity": 0.8723687169913522, "compression_loss": 102.87263488769531, "distillation_loss": 3.5367941856384277, "epoch": 4.06, "learning_rate": 3.301869071099841e-05, "loss": 107.2771, "step": 4799, "task_loss": 2.3613638877868652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.968901413628685, "compression/movement_sparsity/importance_threshold": -0.00022120729771045282, "compression/movement_sparsity/linear_layer_sparsity": 0.903464430971089, "compression/movement_sparsity/model_sparsity": 0.8724276483855486, "compression_loss": 102.88094329833984, "distillation_loss": 5.1044793128967285, "epoch": 4.06, "learning_rate": 3.301399455245609e-05, "loss": 107.1177, "step": 4800, "task_loss": 3.2936837673187256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9689848623847551, "compression/movement_sparsity/importance_threshold": -0.00022061371851662178, "compression/movement_sparsity/linear_layer_sparsity": 0.9034977828679664, "compression/movement_sparsity/model_sparsity": 0.872459854542166, "compression_loss": 102.88922882080078, "distillation_loss": 4.921917915344238, "epoch": 4.06, "learning_rate": 3.300929839391378e-05, "loss": 107.802, "step": 4801, "task_loss": 3.3596668243408203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9690681617250083, "compression/movement_sparsity/importance_threshold": -0.00022002120213216424, "compression/movement_sparsity/linear_layer_sparsity": 0.9035179227871034, "compression/movement_sparsity/model_sparsity": 0.872479302593123, "compression_loss": 102.89746856689453, "distillation_loss": 3.7951250076293945, "epoch": 4.06, "learning_rate": 3.300460223537147e-05, "loss": 107.0534, "step": 4802, "task_loss": 1.6182130575180054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9691513117833296, "compression/movement_sparsity/importance_threshold": -0.00021942974760474302, "compression/movement_sparsity/linear_layer_sparsity": 0.9035836368749447, "compression/movement_sparsity/model_sparsity": 0.8725427591998867, "compression_loss": 102.90571594238281, "distillation_loss": 3.6605305671691895, "epoch": 4.06, "learning_rate": 3.299990607682916e-05, "loss": 107.4268, "step": 4803, "task_loss": 2.1650774478912354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9692343126936044, "compression/movement_sparsity/importance_threshold": -0.00021883935398201836, "compression/movement_sparsity/linear_layer_sparsity": 0.9035713311339445, "compression/movement_sparsity/model_sparsity": 0.8725308761989468, "compression_loss": 102.91395568847656, "distillation_loss": 4.582568168640137, "epoch": 4.06, "learning_rate": 3.2995209918286846e-05, "loss": 106.8599, "step": 4804, "task_loss": 1.9324482679367065 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9693171645897176, "compression/movement_sparsity/importance_threshold": -0.00021825002031165223, "compression/movement_sparsity/linear_layer_sparsity": 0.9035211423123651, "compression/movement_sparsity/model_sparsity": 0.8724824115177875, "compression_loss": 102.92224884033203, "distillation_loss": 5.110498905181885, "epoch": 4.06, "learning_rate": 3.2990513759744526e-05, "loss": 107.6983, "step": 4805, "task_loss": 2.8777363300323486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9693998676055544, "compression/movement_sparsity/importance_threshold": -0.00021766174564130745, "compression/movement_sparsity/linear_layer_sparsity": 0.9035910537072142, "compression/movement_sparsity/model_sparsity": 0.8725499212411509, "compression_loss": 102.93048095703125, "distillation_loss": 3.3560080528259277, "epoch": 4.06, "learning_rate": 3.298581760120222e-05, "loss": 107.5652, "step": 4806, "task_loss": 2.0163984298706055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.969482421875, "compression/movement_sparsity/importance_threshold": -0.000217074529018646, "compression/movement_sparsity/linear_layer_sparsity": 0.9036571255200845, "compression/movement_sparsity/model_sparsity": 0.8726137232839885, "compression_loss": 102.93868255615234, "distillation_loss": 4.140921115875244, "epoch": 4.06, "learning_rate": 3.2981121442659905e-05, "loss": 106.4937, "step": 4807, "task_loss": 1.935791254043579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9695648275319395, "compression/movement_sparsity/importance_threshold": -0.00021648836949132895, "compression/movement_sparsity/linear_layer_sparsity": 0.903762356299471, "compression/movement_sparsity/model_sparsity": 0.8727153390623753, "compression_loss": 102.94686889648438, "distillation_loss": 3.6986474990844727, "epoch": 4.06, "learning_rate": 3.29764252841176e-05, "loss": 106.5848, "step": 4808, "task_loss": 1.9722487926483154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9696470847102581, "compression/movement_sparsity/importance_threshold": -0.0002159032661070183, "compression/movement_sparsity/linear_layer_sparsity": 0.9037953623954871, "compression/movement_sparsity/model_sparsity": 0.8727472112974546, "compression_loss": 102.95500183105469, "distillation_loss": 4.228645324707031, "epoch": 4.07, "learning_rate": 3.297172912557528e-05, "loss": 107.433, "step": 4809, "task_loss": 3.2162859439849854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9697291935438409, "compression/movement_sparsity/importance_threshold": -0.00021531921791337512, "compression/movement_sparsity/linear_layer_sparsity": 0.9037311030560974, "compression/movement_sparsity/model_sparsity": 0.8726851594640578, "compression_loss": 102.963134765625, "distillation_loss": 5.794671535491943, "epoch": 4.07, "learning_rate": 3.296703296703297e-05, "loss": 108.2642, "step": 4810, "task_loss": 3.2034966945648193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.969811154166573, "compression/movement_sparsity/importance_threshold": -0.00021473622395806226, "compression/movement_sparsity/linear_layer_sparsity": 0.9037813514985149, "compression/movement_sparsity/model_sparsity": 0.872733681717896, "compression_loss": 102.97123718261719, "distillation_loss": 3.766575574874878, "epoch": 4.07, "learning_rate": 3.296233680849066e-05, "loss": 107.6929, "step": 4811, "task_loss": 2.023303747177124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9698929667123396, "compression/movement_sparsity/importance_threshold": -0.00021415428328874254, "compression/movement_sparsity/linear_layer_sparsity": 0.9038394698915722, "compression/movement_sparsity/model_sparsity": 0.8727898035653585, "compression_loss": 102.97930908203125, "distillation_loss": 4.461671829223633, "epoch": 4.07, "learning_rate": 3.2957640649948344e-05, "loss": 106.6472, "step": 4812, "task_loss": 3.094827890396118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9699746313150257, "compression/movement_sparsity/importance_threshold": -0.00021357339495307708, "compression/movement_sparsity/linear_layer_sparsity": 0.9038884424480526, "compression/movement_sparsity/model_sparsity": 0.8728370937638668, "compression_loss": 102.98737335205078, "distillation_loss": 4.97735071182251, "epoch": 4.07, "learning_rate": 3.295294449140603e-05, "loss": 107.6898, "step": 4813, "task_loss": 2.369351863861084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9700561481085167, "compression/movement_sparsity/importance_threshold": -0.00021299355799872608, "compression/movement_sparsity/linear_layer_sparsity": 0.9039166311803438, "compression/movement_sparsity/model_sparsity": 0.872864314126485, "compression_loss": 102.99542236328125, "distillation_loss": 4.380774974822998, "epoch": 4.07, "learning_rate": 3.2948248332863716e-05, "loss": 106.8658, "step": 4814, "task_loss": 2.597215414047241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9701375172266976, "compression/movement_sparsity/importance_threshold": -0.0002124147714733524, "compression/movement_sparsity/linear_layer_sparsity": 0.9039456784527048, "compression/movement_sparsity/model_sparsity": 0.8728923635356806, "compression_loss": 103.0034408569336, "distillation_loss": 3.8834736347198486, "epoch": 4.07, "learning_rate": 3.294355217432141e-05, "loss": 107.1064, "step": 4815, "task_loss": 1.82835054397583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9702187388034534, "compression/movement_sparsity/importance_threshold": -0.00021183703442461885, "compression/movement_sparsity/linear_layer_sparsity": 0.9040115714030605, "compression/movement_sparsity/model_sparsity": 0.8729559928604812, "compression_loss": 103.01138305664062, "distillation_loss": 4.399380683898926, "epoch": 4.07, "learning_rate": 3.2938856015779096e-05, "loss": 107.7377, "step": 4816, "task_loss": 1.6610209941864014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9702998129726695, "compression/movement_sparsity/importance_threshold": -0.00021126034590018654, "compression/movement_sparsity/linear_layer_sparsity": 0.9040387227327674, "compression/movement_sparsity/model_sparsity": 0.8729822114584853, "compression_loss": 103.01934814453125, "distillation_loss": 5.262960433959961, "epoch": 4.07, "learning_rate": 3.293415985723678e-05, "loss": 107.8615, "step": 4817, "task_loss": 2.677272081375122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9703807398682309, "compression/movement_sparsity/importance_threshold": -0.00021068470494771743, "compression/movement_sparsity/linear_layer_sparsity": 0.9041561042389749, "compression/movement_sparsity/model_sparsity": 0.8730955605488468, "compression_loss": 103.0272216796875, "distillation_loss": 4.218008995056152, "epoch": 4.07, "learning_rate": 3.292946369869447e-05, "loss": 107.689, "step": 4818, "task_loss": 2.4009897708892822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9704615196240227, "compression/movement_sparsity/importance_threshold": -0.0002101101106148735, "compression/movement_sparsity/linear_layer_sparsity": 0.904237343593078, "compression/movement_sparsity/model_sparsity": 0.873174009081215, "compression_loss": 103.03514099121094, "distillation_loss": 2.7961981296539307, "epoch": 4.07, "learning_rate": 3.2924767540152155e-05, "loss": 106.1696, "step": 4819, "task_loss": 2.109779119491577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9705421523739303, "compression/movement_sparsity/importance_threshold": -0.00020953656194931496, "compression/movement_sparsity/linear_layer_sparsity": 0.9042099060833478, "compression/movement_sparsity/model_sparsity": 0.8731475141343518, "compression_loss": 103.04300689697266, "distillation_loss": 4.158880710601807, "epoch": 4.07, "learning_rate": 3.292007138160985e-05, "loss": 106.94, "step": 4820, "task_loss": 2.387331008911133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9706226382518384, "compression/movement_sparsity/importance_threshold": -0.00020896405799870725, "compression/movement_sparsity/linear_layer_sparsity": 0.9042421609568029, "compression/movement_sparsity/model_sparsity": 0.873178660953676, "compression_loss": 103.05086517333984, "distillation_loss": 4.648735046386719, "epoch": 4.08, "learning_rate": 3.2915375223067534e-05, "loss": 108.0033, "step": 4821, "task_loss": 2.8789877891540527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9707029773916325, "compression/movement_sparsity/importance_threshold": -0.00020839259781070887, "compression/movement_sparsity/linear_layer_sparsity": 0.9042309760875604, "compression/movement_sparsity/model_sparsity": 0.8731678603191008, "compression_loss": 103.05873107910156, "distillation_loss": 3.484093189239502, "epoch": 4.08, "learning_rate": 3.291067906452522e-05, "loss": 106.6508, "step": 4822, "task_loss": 2.484426736831665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9707831699271976, "compression/movement_sparsity/importance_threshold": -0.00020782218043298266, "compression/movement_sparsity/linear_layer_sparsity": 0.9043532703508338, "compression/movement_sparsity/model_sparsity": 0.8732859533982096, "compression_loss": 103.06653594970703, "distillation_loss": 4.334480285644531, "epoch": 4.08, "learning_rate": 3.290598290598291e-05, "loss": 107.1241, "step": 4823, "task_loss": 2.383352756500244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9708632159924188, "compression/movement_sparsity/importance_threshold": -0.00020725280491319144, "compression/movement_sparsity/linear_layer_sparsity": 0.9043605560172593, "compression/movement_sparsity/model_sparsity": 0.87329298877958, "compression_loss": 103.07433319091797, "distillation_loss": 4.55523681640625, "epoch": 4.08, "learning_rate": 3.290128674744059e-05, "loss": 107.1511, "step": 4824, "task_loss": 2.0574839115142822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9709431157211814, "compression/movement_sparsity/importance_threshold": -0.00020668447029899544, "compression/movement_sparsity/linear_layer_sparsity": 0.9043997388321108, "compression/movement_sparsity/model_sparsity": 0.8733308255442008, "compression_loss": 103.08216094970703, "distillation_loss": 5.640742778778076, "epoch": 4.08, "learning_rate": 3.2896590588898286e-05, "loss": 108.0422, "step": 4825, "task_loss": 3.3931729793548584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9710228692473704, "compression/movement_sparsity/importance_threshold": -0.00020611717563805837, "compression/movement_sparsity/linear_layer_sparsity": 0.9044478528485215, "compression/movement_sparsity/model_sparsity": 0.8733772866961318, "compression_loss": 103.08992767333984, "distillation_loss": 6.397430419921875, "epoch": 4.08, "learning_rate": 3.2891894430355966e-05, "loss": 107.4103, "step": 4826, "task_loss": 3.450395107269287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.971102476704871, "compression/movement_sparsity/importance_threshold": -0.00020555091997804045, "compression/movement_sparsity/linear_layer_sparsity": 0.9045259799948716, "compression/movement_sparsity/model_sparsity": 0.8734527299346576, "compression_loss": 103.09773254394531, "distillation_loss": 3.7132699489593506, "epoch": 4.08, "learning_rate": 3.288719827181366e-05, "loss": 107.3559, "step": 4827, "task_loss": 1.2397663593292236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9711819382275683, "compression/movement_sparsity/importance_threshold": -0.0002049857023666054, "compression/movement_sparsity/linear_layer_sparsity": 0.9045004503519632, "compression/movement_sparsity/model_sparsity": 0.8734280773135216, "compression_loss": 103.1054916381836, "distillation_loss": 3.0998220443725586, "epoch": 4.08, "learning_rate": 3.2882502113271346e-05, "loss": 107.0832, "step": 4828, "task_loss": 2.233264684677124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9712612539493475, "compression/movement_sparsity/importance_threshold": -0.00020442152185141255, "compression/movement_sparsity/linear_layer_sparsity": 0.9046374828864346, "compression/movement_sparsity/model_sparsity": 0.8735604023588724, "compression_loss": 103.11326599121094, "distillation_loss": 3.539904832839966, "epoch": 4.08, "learning_rate": 3.287780595472903e-05, "loss": 107.2518, "step": 4829, "task_loss": 2.107739210128784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9713404240040936, "compression/movement_sparsity/importance_threshold": -0.00020385837748012562, "compression/movement_sparsity/linear_layer_sparsity": 0.9046544152044775, "compression/movement_sparsity/model_sparsity": 0.8735767529997006, "compression_loss": 103.12102508544922, "distillation_loss": 2.9822254180908203, "epoch": 4.08, "learning_rate": 3.287310979618672e-05, "loss": 107.1362, "step": 4830, "task_loss": 2.0820131301879883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.971419448525692, "compression/movement_sparsity/importance_threshold": -0.0002032962683004057, "compression/movement_sparsity/linear_layer_sparsity": 0.9047751235534554, "compression/movement_sparsity/model_sparsity": 0.8736933146455488, "compression_loss": 103.12872314453125, "distillation_loss": 4.026538372039795, "epoch": 4.08, "learning_rate": 3.2868413637644405e-05, "loss": 107.2389, "step": 4831, "task_loss": 1.8716963529586792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9714983276480276, "compression/movement_sparsity/importance_threshold": -0.00020273519335991478, "compression/movement_sparsity/linear_layer_sparsity": 0.9048366641826241, "compression/movement_sparsity/model_sparsity": 0.8737527411647844, "compression_loss": 103.13644409179688, "distillation_loss": 6.264771938323975, "epoch": 4.08, "learning_rate": 3.28637174791021e-05, "loss": 107.8643, "step": 4832, "task_loss": 3.23293399810791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9715770615049857, "compression/movement_sparsity/importance_threshold": -0.00020217515170631393, "compression/movement_sparsity/linear_layer_sparsity": 0.9048883792976607, "compression/movement_sparsity/model_sparsity": 0.8738026797065254, "compression_loss": 103.14408874511719, "distillation_loss": 3.94463849067688, "epoch": 4.09, "learning_rate": 3.2859021320559784e-05, "loss": 107.7763, "step": 4833, "task_loss": 2.177802801132202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9716556502304512, "compression/movement_sparsity/importance_threshold": -0.00020161614238726772, "compression/movement_sparsity/linear_layer_sparsity": 0.9048763597366839, "compression/movement_sparsity/model_sparsity": 0.8737910730544445, "compression_loss": 103.15184783935547, "distillation_loss": 6.043858051300049, "epoch": 4.09, "learning_rate": 3.285432516201747e-05, "loss": 107.6529, "step": 4834, "task_loss": 2.6674282550811768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9717340939583096, "compression/movement_sparsity/importance_threshold": -0.00020105816445043553, "compression/movement_sparsity/linear_layer_sparsity": 0.9049773335882243, "compression/movement_sparsity/model_sparsity": 0.8738885781435526, "compression_loss": 103.15945434570312, "distillation_loss": 6.858139991760254, "epoch": 4.09, "learning_rate": 3.284962900347516e-05, "loss": 107.8779, "step": 4835, "task_loss": 3.2211005687713623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9718123928224458, "compression/movement_sparsity/importance_threshold": -0.00020050121694347844, "compression/movement_sparsity/linear_layer_sparsity": 0.9050419029559725, "compression/movement_sparsity/model_sparsity": 0.8739509293548801, "compression_loss": 103.1671142578125, "distillation_loss": 2.6188666820526123, "epoch": 4.09, "learning_rate": 3.284493284493284e-05, "loss": 107.0104, "step": 4836, "task_loss": 2.8284921646118164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9718905469567449, "compression/movement_sparsity/importance_threshold": -0.00019994529891406102, "compression/movement_sparsity/linear_layer_sparsity": 0.9050826001401138, "compression/movement_sparsity/model_sparsity": 0.8739902284655469, "compression_loss": 103.17471313476562, "distillation_loss": 4.547994136810303, "epoch": 4.09, "learning_rate": 3.2840236686390536e-05, "loss": 107.8213, "step": 4837, "task_loss": 2.452706813812256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9719685564950922, "compression/movement_sparsity/importance_threshold": -0.00019939040940984438, "compression/movement_sparsity/linear_layer_sparsity": 0.905099556306492, "compression/movement_sparsity/model_sparsity": 0.8740066021354467, "compression_loss": 103.18231201171875, "distillation_loss": 4.795331001281738, "epoch": 4.09, "learning_rate": 3.283554052784822e-05, "loss": 107.463, "step": 4838, "task_loss": 3.0170867443084717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9720464215713728, "compression/movement_sparsity/importance_threshold": -0.00019883654747848787, "compression/movement_sparsity/linear_layer_sparsity": 0.9051548367476518, "compression/movement_sparsity/model_sparsity": 0.8740599835233902, "compression_loss": 103.1899642944336, "distillation_loss": 6.199403762817383, "epoch": 4.09, "learning_rate": 3.283084436930591e-05, "loss": 108.0203, "step": 4839, "task_loss": 3.600592613220215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9721241423194718, "compression/movement_sparsity/importance_threshold": -0.00019828371216765693, "compression/movement_sparsity/linear_layer_sparsity": 0.9051450112335199, "compression/movement_sparsity/model_sparsity": 0.8740504955458954, "compression_loss": 103.1975326538086, "distillation_loss": 5.257901668548584, "epoch": 4.09, "learning_rate": 3.2826148210763595e-05, "loss": 107.5942, "step": 4840, "task_loss": 2.657195806503296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9722017188732743, "compression/movement_sparsity/importance_threshold": -0.00019773190252501006, "compression/movement_sparsity/linear_layer_sparsity": 0.9052293985678789, "compression/movement_sparsity/model_sparsity": 0.8741319839157135, "compression_loss": 103.20520782470703, "distillation_loss": 5.570219993591309, "epoch": 4.09, "learning_rate": 3.282145205222129e-05, "loss": 107.9178, "step": 4841, "task_loss": 3.7689521312713623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9722791513666655, "compression/movement_sparsity/importance_threshold": -0.00019718111759821182, "compression/movement_sparsity/linear_layer_sparsity": 0.9053042704164644, "compression/movement_sparsity/model_sparsity": 0.8742042836859674, "compression_loss": 103.21283721923828, "distillation_loss": 3.9910061359405518, "epoch": 4.09, "learning_rate": 3.2816755893678975e-05, "loss": 108.0466, "step": 4842, "task_loss": 1.9527497291564941 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9723564399335306, "compression/movement_sparsity/importance_threshold": -0.0001966313564349233, "compression/movement_sparsity/linear_layer_sparsity": 0.905368303196669, "compression/movement_sparsity/model_sparsity": 0.874266116743184, "compression_loss": 103.22049713134766, "distillation_loss": 5.194499969482422, "epoch": 4.09, "learning_rate": 3.281205973513666e-05, "loss": 107.4968, "step": 4843, "task_loss": 3.802698850631714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9724335847077547, "compression/movement_sparsity/importance_threshold": -0.00019608261808280562, "compression/movement_sparsity/linear_layer_sparsity": 0.9054194817241621, "compression/movement_sparsity/model_sparsity": 0.8743155371308142, "compression_loss": 103.22807312011719, "distillation_loss": 3.2729411125183105, "epoch": 4.09, "learning_rate": 3.280736357659435e-05, "loss": 107.7342, "step": 4844, "task_loss": 1.4693323373794556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9725105858232228, "compression/movement_sparsity/importance_threshold": -0.0001955349015895216, "compression/movement_sparsity/linear_layer_sparsity": 0.9054175142365022, "compression/movement_sparsity/model_sparsity": 0.8743136372324081, "compression_loss": 103.23567962646484, "distillation_loss": 2.918156385421753, "epoch": 4.1, "learning_rate": 3.2802667418052034e-05, "loss": 106.8542, "step": 4845, "task_loss": 1.4343585968017578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9725874434138202, "compression/movement_sparsity/importance_threshold": -0.00019498820600273147, "compression/movement_sparsity/linear_layer_sparsity": 0.9054975373255065, "compression/movement_sparsity/model_sparsity": 0.8743909112821252, "compression_loss": 103.24325561523438, "distillation_loss": 3.763005256652832, "epoch": 4.1, "learning_rate": 3.279797125950973e-05, "loss": 108.06, "step": 4846, "task_loss": 3.1957905292510986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9726641576134321, "compression/movement_sparsity/importance_threshold": -0.00019444253037009893, "compression/movement_sparsity/linear_layer_sparsity": 0.9055586844571432, "compression/movement_sparsity/model_sparsity": 0.8744499578216797, "compression_loss": 103.25080108642578, "distillation_loss": 6.07440185546875, "epoch": 4.1, "learning_rate": 3.2793275100967406e-05, "loss": 108.3253, "step": 4847, "task_loss": 3.719710111618042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9727407285559434, "compression/movement_sparsity/importance_threshold": -0.00019389787373928508, "compression/movement_sparsity/linear_layer_sparsity": 0.9056225383748332, "compression/movement_sparsity/model_sparsity": 0.8745116181608594, "compression_loss": 103.25829315185547, "distillation_loss": 4.972978591918945, "epoch": 4.1, "learning_rate": 3.27885789424251e-05, "loss": 107.8371, "step": 4848, "task_loss": 2.3029229640960693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9728171563752394, "compression/movement_sparsity/importance_threshold": -0.0001933542351579519, "compression/movement_sparsity/linear_layer_sparsity": 0.9057261593915888, "compression/movement_sparsity/model_sparsity": 0.874611679476914, "compression_loss": 103.26583099365234, "distillation_loss": 4.359081268310547, "epoch": 4.1, "learning_rate": 3.2783882783882786e-05, "loss": 107.9087, "step": 4849, "task_loss": 1.5407847166061401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9728934412052053, "compression/movement_sparsity/importance_threshold": -0.00019281161367376044, "compression/movement_sparsity/linear_layer_sparsity": 0.9057720435886517, "compression/movement_sparsity/model_sparsity": 0.8746559874106514, "compression_loss": 103.27328491210938, "distillation_loss": 3.7044687271118164, "epoch": 4.1, "learning_rate": 3.277918662534047e-05, "loss": 107.5292, "step": 4850, "task_loss": 2.3496298789978027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.972969583179726, "compression/movement_sparsity/importance_threshold": -0.00019227000833437358, "compression/movement_sparsity/linear_layer_sparsity": 0.9058243668362379, "compression/movement_sparsity/model_sparsity": 0.8747065131937178, "compression_loss": 103.28071594238281, "distillation_loss": 3.697248935699463, "epoch": 4.1, "learning_rate": 3.2774490466798165e-05, "loss": 106.6944, "step": 4851, "task_loss": 1.3391684293746948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9730455824326869, "compression/movement_sparsity/importance_threshold": -0.00019172941818745327, "compression/movement_sparsity/linear_layer_sparsity": 0.9058421934668535, "compression/movement_sparsity/model_sparsity": 0.8747237274247307, "compression_loss": 103.28816986083984, "distillation_loss": 3.5745174884796143, "epoch": 4.1, "learning_rate": 3.2769794308255845e-05, "loss": 107.4355, "step": 4852, "task_loss": 2.022768259048462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.973121439097973, "compression/movement_sparsity/importance_threshold": -0.00019118984228066148, "compression/movement_sparsity/linear_layer_sparsity": 0.9058407387184019, "compression/movement_sparsity/model_sparsity": 0.8747223226513637, "compression_loss": 103.2955551147461, "distillation_loss": 4.045719146728516, "epoch": 4.1, "learning_rate": 3.276509814971354e-05, "loss": 108.3532, "step": 4853, "task_loss": 1.4291975498199463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9731971533094697, "compression/movement_sparsity/importance_threshold": -0.00019065127966165756, "compression/movement_sparsity/linear_layer_sparsity": 0.9059021243333913, "compression/movement_sparsity/model_sparsity": 0.874781599481634, "compression_loss": 103.30292510986328, "distillation_loss": 4.869051933288574, "epoch": 4.1, "learning_rate": 3.2760401991171224e-05, "loss": 107.7208, "step": 4854, "task_loss": 2.1336467266082764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9732727252010617, "compression/movement_sparsity/importance_threshold": -0.0001901137293781061, "compression/movement_sparsity/linear_layer_sparsity": 0.906022415336502, "compression/movement_sparsity/model_sparsity": 0.8748977581187294, "compression_loss": 103.31038665771484, "distillation_loss": 3.59769868850708, "epoch": 4.1, "learning_rate": 3.275570583262891e-05, "loss": 108.1108, "step": 4855, "task_loss": 1.9437288045883179 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9733481549066343, "compression/movement_sparsity/importance_threshold": -0.00018957719047766903, "compression/movement_sparsity/linear_layer_sparsity": 0.9060772784317945, "compression/movement_sparsity/model_sparsity": 0.87495073649792, "compression_loss": 103.3177490234375, "distillation_loss": 5.469602584838867, "epoch": 4.1, "learning_rate": 3.27510096740866e-05, "loss": 107.4041, "step": 4856, "task_loss": 3.147029399871826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9734234425600728, "compression/movement_sparsity/importance_threshold": -0.0001890416620080066, "compression/movement_sparsity/linear_layer_sparsity": 0.9060920524754954, "compression/movement_sparsity/model_sparsity": 0.8749650030077695, "compression_loss": 103.32515716552734, "distillation_loss": 4.807843208312988, "epoch": 4.11, "learning_rate": 3.274631351554428e-05, "loss": 107.7106, "step": 4857, "task_loss": 3.04524302482605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9734985882952623, "compression/movement_sparsity/importance_threshold": -0.00018850714301678166, "compression/movement_sparsity/linear_layer_sparsity": 0.9060643884065801, "compression/movement_sparsity/model_sparsity": 0.8749382892847262, "compression_loss": 103.33256530761719, "distillation_loss": 5.792247772216797, "epoch": 4.11, "learning_rate": 3.2741617357001976e-05, "loss": 108.678, "step": 4858, "task_loss": 3.7883071899414062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9735735922460879, "compression/movement_sparsity/importance_threshold": -0.00018797363255165529, "compression/movement_sparsity/linear_layer_sparsity": 0.9061194303643872, "compression/movement_sparsity/model_sparsity": 0.8749914403819538, "compression_loss": 103.33988952636719, "distillation_loss": 4.527008056640625, "epoch": 4.11, "learning_rate": 3.273692119845966e-05, "loss": 108.0584, "step": 4859, "task_loss": 2.5692434310913086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9736484545464347, "compression/movement_sparsity/importance_threshold": -0.00018744112966028945, "compression/movement_sparsity/linear_layer_sparsity": 0.9061600917760256, "compression/movement_sparsity/model_sparsity": 0.8750307049490131, "compression_loss": 103.3472671508789, "distillation_loss": 6.009920120239258, "epoch": 4.11, "learning_rate": 3.273222503991735e-05, "loss": 108.3107, "step": 4860, "task_loss": 2.7547476291656494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9737231753301877, "compression/movement_sparsity/importance_threshold": -0.00018690963339034784, "compression/movement_sparsity/linear_layer_sparsity": 0.9061366369382858, "compression/movement_sparsity/model_sparsity": 0.8750080558571053, "compression_loss": 103.35454559326172, "distillation_loss": 5.003308296203613, "epoch": 4.11, "learning_rate": 3.2727528881375035e-05, "loss": 107.3145, "step": 4861, "task_loss": 3.731327772140503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9737977547312324, "compression/movement_sparsity/importance_threshold": -0.00018637914278948897, "compression/movement_sparsity/linear_layer_sparsity": 0.9061783596008437, "compression/movement_sparsity/model_sparsity": 0.8750483452178504, "compression_loss": 103.36185455322266, "distillation_loss": 5.352428436279297, "epoch": 4.11, "learning_rate": 3.272283272283272e-05, "loss": 107.5491, "step": 4862, "task_loss": 3.911506175994873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9738721928834537, "compression/movement_sparsity/importance_threshold": -0.0001858496569053774, "compression/movement_sparsity/linear_layer_sparsity": 0.9062396021258216, "compression/movement_sparsity/model_sparsity": 0.8751074838736911, "compression_loss": 103.36918640136719, "distillation_loss": 4.025177955627441, "epoch": 4.11, "learning_rate": 3.2718136564290415e-05, "loss": 107.7151, "step": 4863, "task_loss": 3.6999523639678955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9739464899207367, "compression/movement_sparsity/importance_threshold": -0.0001853211747856751, "compression/movement_sparsity/linear_layer_sparsity": 0.9062919134492401, "compression/movement_sparsity/model_sparsity": 0.8751579981422217, "compression_loss": 103.3764877319336, "distillation_loss": 3.726175308227539, "epoch": 4.11, "learning_rate": 3.2713440405748094e-05, "loss": 107.133, "step": 4864, "task_loss": 2.8486487865448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9740206459769667, "compression/movement_sparsity/importance_threshold": -0.00018479369547804228, "compression/movement_sparsity/linear_layer_sparsity": 0.9063169303529401, "compression/movement_sparsity/model_sparsity": 0.8751821556383187, "compression_loss": 103.3837890625, "distillation_loss": 4.721752643585205, "epoch": 4.11, "learning_rate": 3.270874424720579e-05, "loss": 107.8614, "step": 4865, "task_loss": 2.464449405670166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9740946611860288, "compression/movement_sparsity/importance_threshold": -0.00018426721803014093, "compression/movement_sparsity/linear_layer_sparsity": 0.9063355082061167, "compression/movement_sparsity/model_sparsity": 0.8752000952850866, "compression_loss": 103.39105987548828, "distillation_loss": 4.396850109100342, "epoch": 4.11, "learning_rate": 3.2704048088663474e-05, "loss": 107.4389, "step": 4866, "task_loss": 2.054183006286621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9741685356818082, "compression/movement_sparsity/importance_threshold": -0.00018374174148963387, "compression/movement_sparsity/linear_layer_sparsity": 0.9063126495767588, "compression/movement_sparsity/model_sparsity": 0.8751780219199685, "compression_loss": 103.3982925415039, "distillation_loss": 4.323485374450684, "epoch": 4.11, "learning_rate": 3.269935193012117e-05, "loss": 107.5146, "step": 4867, "task_loss": 2.043098211288452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9742422695981897, "compression/movement_sparsity/importance_threshold": -0.00018321726490418307, "compression/movement_sparsity/linear_layer_sparsity": 0.9063724135049498, "compression/movement_sparsity/model_sparsity": 0.8752357327733707, "compression_loss": 103.40547943115234, "distillation_loss": 6.124298572540283, "epoch": 4.11, "learning_rate": 3.269465577157885e-05, "loss": 108.1661, "step": 4868, "task_loss": 3.7112035751342773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9743158630690588, "compression/movement_sparsity/importance_threshold": -0.00018269378732144962, "compression/movement_sparsity/linear_layer_sparsity": 0.9064608312079697, "compression/movement_sparsity/model_sparsity": 0.8753211130562872, "compression_loss": 103.41267395019531, "distillation_loss": 4.096841812133789, "epoch": 4.12, "learning_rate": 3.268995961303653e-05, "loss": 107.7176, "step": 4869, "task_loss": 1.1810988187789917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9743893162283006, "compression/movement_sparsity/importance_threshold": -0.00018217130778909463, "compression/movement_sparsity/linear_layer_sparsity": 0.906503948998141, "compression/movement_sparsity/model_sparsity": 0.8753627496177203, "compression_loss": 103.41981506347656, "distillation_loss": 4.687103748321533, "epoch": 4.12, "learning_rate": 3.2685263454494226e-05, "loss": 107.3766, "step": 4870, "task_loss": 2.8337979316711426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9744626292098001, "compression/movement_sparsity/importance_threshold": -0.00018164982535478266, "compression/movement_sparsity/linear_layer_sparsity": 0.9064920129063375, "compression/movement_sparsity/model_sparsity": 0.8753512235673899, "compression_loss": 103.427001953125, "distillation_loss": 4.161503791809082, "epoch": 4.12, "learning_rate": 3.268056729595191e-05, "loss": 107.6183, "step": 4871, "task_loss": 2.5879976749420166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9745358021474425, "compression/movement_sparsity/importance_threshold": -0.0001811293390661722, "compression/movement_sparsity/linear_layer_sparsity": 0.9064588160236392, "compression/movement_sparsity/model_sparsity": 0.875319167099738, "compression_loss": 103.4341049194336, "distillation_loss": 5.987800121307373, "epoch": 4.12, "learning_rate": 3.2675871137409605e-05, "loss": 108.7963, "step": 4872, "task_loss": 2.712656021118164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.974608835175113, "compression/movement_sparsity/importance_threshold": -0.00018060984797092783, "compression/movement_sparsity/linear_layer_sparsity": 0.9065138222089435, "compression/movement_sparsity/model_sparsity": 0.8753722836533582, "compression_loss": 103.44125366210938, "distillation_loss": 5.104811191558838, "epoch": 4.12, "learning_rate": 3.2671174978867285e-05, "loss": 108.171, "step": 4873, "task_loss": 2.6898844242095947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9746817284266966, "compression/movement_sparsity/importance_threshold": -0.00018009135111670977, "compression/movement_sparsity/linear_layer_sparsity": 0.9065810029694039, "compression/movement_sparsity/model_sparsity": 0.8754371565480246, "compression_loss": 103.44834899902344, "distillation_loss": 4.535988807678223, "epoch": 4.12, "learning_rate": 3.266647882032498e-05, "loss": 107.6259, "step": 4874, "task_loss": 3.2055318355560303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9747544820360787, "compression/movement_sparsity/importance_threshold": -0.00017957384755118, "compression/movement_sparsity/linear_layer_sparsity": 0.9066259809297263, "compression/movement_sparsity/model_sparsity": 0.8754805893770417, "compression_loss": 103.45539855957031, "distillation_loss": 4.126613616943359, "epoch": 4.12, "learning_rate": 3.2661782661782664e-05, "loss": 107.8037, "step": 4875, "task_loss": 2.0492770671844482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9748270961371442, "compression/movement_sparsity/importance_threshold": -0.00017905733632200133, "compression/movement_sparsity/linear_layer_sparsity": 0.906689131321526, "compression/movement_sparsity/model_sparsity": 0.8755415703586095, "compression_loss": 103.46248626708984, "distillation_loss": 3.5844597816467285, "epoch": 4.12, "learning_rate": 3.265708650324035e-05, "loss": 107.8612, "step": 4876, "task_loss": 1.0546350479125977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9748995708637783, "compression/movement_sparsity/importance_threshold": -0.00017854181647683487, "compression/movement_sparsity/linear_layer_sparsity": 0.9067327260784026, "compression/movement_sparsity/model_sparsity": 0.8755836675014743, "compression_loss": 103.4695816040039, "distillation_loss": 5.011507034301758, "epoch": 4.12, "learning_rate": 3.265239034469804e-05, "loss": 107.7929, "step": 4877, "task_loss": 3.8710036277770996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9749719063498662, "compression/movement_sparsity/importance_threshold": -0.0001780272870633426, "compression/movement_sparsity/linear_layer_sparsity": 0.9068098635188391, "compression/movement_sparsity/model_sparsity": 0.8756581550335293, "compression_loss": 103.47657775878906, "distillation_loss": 5.244514465332031, "epoch": 4.12, "learning_rate": 3.2647694186155723e-05, "loss": 108.0251, "step": 4878, "task_loss": 3.747767686843872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.975044102729293, "compression/movement_sparsity/importance_threshold": -0.00017751374712918645, "compression/movement_sparsity/linear_layer_sparsity": 0.9067929312007961, "compression/movement_sparsity/model_sparsity": 0.875641804392701, "compression_loss": 103.48361206054688, "distillation_loss": 4.006139755249023, "epoch": 4.12, "learning_rate": 3.2642998027613417e-05, "loss": 107.5155, "step": 4879, "task_loss": 1.6200509071350098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9751161601359437, "compression/movement_sparsity/importance_threshold": -0.0001770011957220284, "compression/movement_sparsity/linear_layer_sparsity": 0.9069140091989707, "compression/movement_sparsity/model_sparsity": 0.8757587229891588, "compression_loss": 103.4906005859375, "distillation_loss": 4.230749130249023, "epoch": 4.13, "learning_rate": 3.26383018690711e-05, "loss": 108.1157, "step": 4880, "task_loss": 1.8781242370605469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9751880787037037, "compression/movement_sparsity/importance_threshold": -0.00017648963188953044, "compression/movement_sparsity/linear_layer_sparsity": 0.9069939368946338, "compression/movement_sparsity/model_sparsity": 0.8758359049225896, "compression_loss": 103.49764251708984, "distillation_loss": 6.6205525398254395, "epoch": 4.13, "learning_rate": 3.263360571052879e-05, "loss": 109.2283, "step": 4881, "task_loss": 2.7544198036193848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.975259858566458, "compression/movement_sparsity/importance_threshold": -0.00017597905467935365, "compression/movement_sparsity/linear_layer_sparsity": 0.907008472454982, "compression/movement_sparsity/model_sparsity": 0.8758499411417231, "compression_loss": 103.50460052490234, "distillation_loss": 5.1475300788879395, "epoch": 4.13, "learning_rate": 3.2628909551986476e-05, "loss": 108.3674, "step": 4882, "task_loss": 3.4986281394958496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9753314998580918, "compression/movement_sparsity/importance_threshold": -0.00017546946313915998, "compression/movement_sparsity/linear_layer_sparsity": 0.9069700527868593, "compression/movement_sparsity/model_sparsity": 0.8758128413073931, "compression_loss": 103.51160430908203, "distillation_loss": 3.920711040496826, "epoch": 4.13, "learning_rate": 3.262421339344416e-05, "loss": 107.202, "step": 4883, "task_loss": 3.0582447052001953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9754030027124901, "compression/movement_sparsity/importance_threshold": -0.00017496085631661228, "compression/movement_sparsity/linear_layer_sparsity": 0.9069308103511696, "compression/movement_sparsity/model_sparsity": 0.8757749469700933, "compression_loss": 103.51854705810547, "distillation_loss": 5.017411231994629, "epoch": 4.13, "learning_rate": 3.2619517234901855e-05, "loss": 107.2064, "step": 4884, "task_loss": 3.110985517501831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9754743672635382, "compression/movement_sparsity/importance_threshold": -0.00017445323325937165, "compression/movement_sparsity/linear_layer_sparsity": 0.9069714836869756, "compression/movement_sparsity/model_sparsity": 0.8758142230516884, "compression_loss": 103.5255355834961, "distillation_loss": 2.981635332107544, "epoch": 4.13, "learning_rate": 3.261482107635954e-05, "loss": 106.796, "step": 4885, "task_loss": 0.6793882846832275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9755455936451212, "compression/movement_sparsity/importance_threshold": -0.00017394659301510004, "compression/movement_sparsity/linear_layer_sparsity": 0.9070385094332567, "compression/movement_sparsity/model_sparsity": 0.8758789462573896, "compression_loss": 103.532470703125, "distillation_loss": 4.924708366394043, "epoch": 4.13, "learning_rate": 3.261012491781723e-05, "loss": 108.167, "step": 4886, "task_loss": 3.057157278060913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9756166819911242, "compression/movement_sparsity/importance_threshold": -0.00017344093463145943, "compression/movement_sparsity/linear_layer_sparsity": 0.9071147287127852, "compression/movement_sparsity/model_sparsity": 0.8759525471701882, "compression_loss": 103.53934478759766, "distillation_loss": 4.1349945068359375, "epoch": 4.13, "learning_rate": 3.2605428759274914e-05, "loss": 107.9241, "step": 4887, "task_loss": 1.949236273765564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9756876324354323, "compression/movement_sparsity/importance_threshold": -0.00017293625715611178, "compression/movement_sparsity/linear_layer_sparsity": 0.9071166842762775, "compression/movement_sparsity/model_sparsity": 0.8759544355540586, "compression_loss": 103.54627227783203, "distillation_loss": 5.352811813354492, "epoch": 4.13, "learning_rate": 3.26007326007326e-05, "loss": 108.1402, "step": 4888, "task_loss": 2.799795627593994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9757584451119308, "compression/movement_sparsity/importance_threshold": -0.00017243255963671905, "compression/movement_sparsity/linear_layer_sparsity": 0.9071746834276583, "compression/movement_sparsity/model_sparsity": 0.8760104422561632, "compression_loss": 103.55311584472656, "distillation_loss": 4.772592067718506, "epoch": 4.13, "learning_rate": 3.2596036442190294e-05, "loss": 107.9542, "step": 4889, "task_loss": 1.7764564752578735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9758291201545047, "compression/movement_sparsity/importance_threshold": -0.00017192984112094148, "compression/movement_sparsity/linear_layer_sparsity": 0.9071606009856803, "compression/movement_sparsity/model_sparsity": 0.8759968435893899, "compression_loss": 103.55998992919922, "distillation_loss": 5.493152618408203, "epoch": 4.13, "learning_rate": 3.259134028364797e-05, "loss": 108.572, "step": 4890, "task_loss": 2.9396629333496094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9758996576970391, "compression/movement_sparsity/importance_threshold": -0.00017142810065644363, "compression/movement_sparsity/linear_layer_sparsity": 0.9071469835862401, "compression/movement_sparsity/model_sparsity": 0.8759836939895125, "compression_loss": 103.56684112548828, "distillation_loss": 3.7565512657165527, "epoch": 4.13, "learning_rate": 3.2586644125105666e-05, "loss": 108.2007, "step": 4891, "task_loss": 2.0404727458953857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9759700578734194, "compression/movement_sparsity/importance_threshold": -0.00017092733729088486, "compression/movement_sparsity/linear_layer_sparsity": 0.9071676600929207, "compression/movement_sparsity/model_sparsity": 0.8760036601945802, "compression_loss": 103.57373046875, "distillation_loss": 3.433934211730957, "epoch": 4.14, "learning_rate": 3.258194796656335e-05, "loss": 108.3461, "step": 4892, "task_loss": 1.904767394065857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9760403208175304, "compression/movement_sparsity/importance_threshold": -0.00017042755007192888, "compression/movement_sparsity/linear_layer_sparsity": 0.9072569959568486, "compression/movement_sparsity/model_sparsity": 0.8760899270967529, "compression_loss": 103.58061981201172, "distillation_loss": 4.807548522949219, "epoch": 4.14, "learning_rate": 3.257725180802104e-05, "loss": 108.1331, "step": 4893, "task_loss": 3.3066797256469727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9761104466632575, "compression/movement_sparsity/importance_threshold": -0.0001699287380472368, "compression/movement_sparsity/linear_layer_sparsity": 0.907282656765601, "compression/movement_sparsity/model_sparsity": 0.8761147063777828, "compression_loss": 103.58748626708984, "distillation_loss": 4.331366062164307, "epoch": 4.14, "learning_rate": 3.2572555649478725e-05, "loss": 107.9159, "step": 4894, "task_loss": 2.7471566200256348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9761804355444857, "compression/movement_sparsity/importance_threshold": -0.00016943090026446967, "compression/movement_sparsity/linear_layer_sparsity": 0.9072851846891398, "compression/movement_sparsity/model_sparsity": 0.8761171474593712, "compression_loss": 103.59430694580078, "distillation_loss": 4.636195182800293, "epoch": 4.14, "learning_rate": 3.256785949093641e-05, "loss": 107.9773, "step": 4895, "task_loss": 2.8655407428741455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9762502875951002, "compression/movement_sparsity/importance_threshold": -0.00016893403577129037, "compression/movement_sparsity/linear_layer_sparsity": 0.9074032458729024, "compression/movement_sparsity/model_sparsity": 0.876231152878273, "compression_loss": 103.60111999511719, "distillation_loss": 4.927591323852539, "epoch": 4.14, "learning_rate": 3.2563163332394105e-05, "loss": 108.0869, "step": 4896, "task_loss": 2.395859956741333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9763200029489862, "compression/movement_sparsity/importance_threshold": -0.00016843814361536084, "compression/movement_sparsity/linear_layer_sparsity": 0.9074520753393713, "compression/movement_sparsity/model_sparsity": 0.8762783049023516, "compression_loss": 103.60794830322266, "distillation_loss": 3.8379712104797363, "epoch": 4.14, "learning_rate": 3.255846717385179e-05, "loss": 108.13, "step": 4897, "task_loss": 2.8540122509002686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9763895817400288, "compression/movement_sparsity/importance_threshold": -0.0001679432228443422, "compression/movement_sparsity/linear_layer_sparsity": 0.9074806933416975, "compression/movement_sparsity/model_sparsity": 0.8763059397882585, "compression_loss": 103.61466979980469, "distillation_loss": 4.425966739654541, "epoch": 4.14, "learning_rate": 3.2553771015309484e-05, "loss": 107.6978, "step": 4898, "task_loss": 2.2052342891693115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9764590241021129, "compression/movement_sparsity/importance_threshold": -0.00016744927250589726, "compression/movement_sparsity/linear_layer_sparsity": 0.9075618015299566, "compression/movement_sparsity/model_sparsity": 0.876384261660733, "compression_loss": 103.6214828491211, "distillation_loss": 5.906187057495117, "epoch": 4.14, "learning_rate": 3.2549074856767164e-05, "loss": 108.7198, "step": 4899, "task_loss": 2.6822450160980225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9765283301691241, "compression/movement_sparsity/importance_threshold": -0.00016695629164768626, "compression/movement_sparsity/linear_layer_sparsity": 0.9076497661146063, "compression/movement_sparsity/model_sparsity": 0.8764692043912893, "compression_loss": 103.62818908691406, "distillation_loss": 4.0747785568237305, "epoch": 4.14, "learning_rate": 3.254437869822485e-05, "loss": 107.6278, "step": 4900, "task_loss": 1.9896405935287476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9765975000749472, "compression/movement_sparsity/importance_threshold": -0.0001664642793173729, "compression/movement_sparsity/linear_layer_sparsity": 0.9076850735749761, "compression/movement_sparsity/model_sparsity": 0.8765032989317769, "compression_loss": 103.63485717773438, "distillation_loss": 5.189642906188965, "epoch": 4.14, "learning_rate": 3.253968253968254e-05, "loss": 108.1773, "step": 4901, "task_loss": 3.233405351638794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9766665339534673, "compression/movement_sparsity/importance_threshold": -0.00016597323456261828, "compression/movement_sparsity/linear_layer_sparsity": 0.907730921999536, "compression/movement_sparsity/model_sparsity": 0.8765475723219069, "compression_loss": 103.64154815673828, "distillation_loss": 3.8432066440582275, "epoch": 4.14, "learning_rate": 3.253498638114023e-05, "loss": 107.8977, "step": 4902, "task_loss": 2.16770601272583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9767354319385698, "compression/movement_sparsity/importance_threshold": -0.0001654831564310835, "compression/movement_sparsity/linear_layer_sparsity": 0.9077907097760622, "compression/movement_sparsity/model_sparsity": 0.8766053062043808, "compression_loss": 103.648193359375, "distillation_loss": 5.514111042022705, "epoch": 4.14, "learning_rate": 3.2530290222597916e-05, "loss": 108.3053, "step": 4903, "task_loss": 2.6223304271698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9768041941641397, "compression/movement_sparsity/importance_threshold": -0.00016499404397043223, "compression/movement_sparsity/linear_layer_sparsity": 0.9078489354866282, "compression/movement_sparsity/model_sparsity": 0.8766615316826655, "compression_loss": 103.6548843383789, "distillation_loss": 4.038149833679199, "epoch": 4.15, "learning_rate": 3.25255940640556e-05, "loss": 107.6951, "step": 4904, "task_loss": 1.4446055889129639 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9768728207640621, "compression/movement_sparsity/importance_threshold": -0.00016450589622832388, "compression/movement_sparsity/linear_layer_sparsity": 0.9078925063951696, "compression/movement_sparsity/model_sparsity": 0.8767036057964588, "compression_loss": 103.66153717041016, "distillation_loss": 2.796388626098633, "epoch": 4.15, "learning_rate": 3.2520897905513295e-05, "loss": 106.9285, "step": 4905, "task_loss": 1.7463316917419434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9769413118722222, "compression/movement_sparsity/importance_threshold": -0.000164018712252423, "compression/movement_sparsity/linear_layer_sparsity": 0.9078909085567064, "compression/movement_sparsity/model_sparsity": 0.8767020628486623, "compression_loss": 103.66819763183594, "distillation_loss": 6.893926620483398, "epoch": 4.15, "learning_rate": 3.251620174697098e-05, "loss": 108.4442, "step": 4906, "task_loss": 4.294881820678711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9770096676225052, "compression/movement_sparsity/importance_threshold": -0.00016353249109038893, "compression/movement_sparsity/linear_layer_sparsity": 0.9079378659288564, "compression/movement_sparsity/model_sparsity": 0.8767474070906212, "compression_loss": 103.67476654052734, "distillation_loss": 3.4626212120056152, "epoch": 4.15, "learning_rate": 3.251150558842866e-05, "loss": 106.9434, "step": 4907, "task_loss": 1.972983956336975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9770778881487961, "compression/movement_sparsity/importance_threshold": -0.0001630472317898854, "compression/movement_sparsity/linear_layer_sparsity": 0.9079747593035218, "compression/movement_sparsity/model_sparsity": 0.8767830330643696, "compression_loss": 103.68142700195312, "distillation_loss": 5.730908393859863, "epoch": 4.15, "learning_rate": 3.2506809429886354e-05, "loss": 108.2205, "step": 4908, "task_loss": 3.6958227157592773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9771459735849801, "compression/movement_sparsity/importance_threshold": -0.0001625629333985735, "compression/movement_sparsity/linear_layer_sparsity": 0.908040604557207, "compression/movement_sparsity/model_sparsity": 0.876846616331027, "compression_loss": 103.68802642822266, "distillation_loss": 4.230669975280762, "epoch": 4.15, "learning_rate": 3.250211327134404e-05, "loss": 107.889, "step": 4909, "task_loss": 2.812018632888794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9772139240649425, "compression/movement_sparsity/importance_threshold": -0.0001620795949641143, "compression/movement_sparsity/linear_layer_sparsity": 0.9080377666053097, "compression/movement_sparsity/model_sparsity": 0.8768438758715079, "compression_loss": 103.69454193115234, "distillation_loss": 5.519952297210693, "epoch": 4.15, "learning_rate": 3.2497417112801734e-05, "loss": 108.6161, "step": 4910, "task_loss": 3.1602208614349365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9772817397225682, "compression/movement_sparsity/importance_threshold": -0.00016159721553417154, "compression/movement_sparsity/linear_layer_sparsity": 0.9080833169256787, "compression/movement_sparsity/model_sparsity": 0.876887861398243, "compression_loss": 103.70116424560547, "distillation_loss": 4.393587112426758, "epoch": 4.15, "learning_rate": 3.249272095425941e-05, "loss": 108.1768, "step": 4911, "task_loss": 2.930288314819336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9773494206917425, "compression/movement_sparsity/importance_threshold": -0.00016111579415640457, "compression/movement_sparsity/linear_layer_sparsity": 0.9080943825532447, "compression/movement_sparsity/model_sparsity": 0.8768985468874604, "compression_loss": 103.70767211914062, "distillation_loss": 4.48351526260376, "epoch": 4.15, "learning_rate": 3.2488024795717106e-05, "loss": 108.23, "step": 4912, "task_loss": 2.446298122406006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9774169671063503, "compression/movement_sparsity/importance_threshold": -0.00016063532987847795, "compression/movement_sparsity/linear_layer_sparsity": 0.9081581887742642, "compression/movement_sparsity/model_sparsity": 0.876960161168497, "compression_loss": 103.71420288085938, "distillation_loss": 5.4873270988464355, "epoch": 4.15, "learning_rate": 3.248332863717479e-05, "loss": 108.1472, "step": 4913, "task_loss": 3.1935336589813232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9774843791002771, "compression/movement_sparsity/importance_threshold": -0.00016015582174805192, "compression/movement_sparsity/linear_layer_sparsity": 0.9081395513002494, "compression/movement_sparsity/model_sparsity": 0.8769421639490501, "compression_loss": 103.72074890136719, "distillation_loss": 6.615564346313477, "epoch": 4.15, "learning_rate": 3.247863247863248e-05, "loss": 108.4739, "step": 4914, "task_loss": 2.786162853240967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9775516568074077, "compression/movement_sparsity/importance_threshold": -0.00015967726881278843, "compression/movement_sparsity/linear_layer_sparsity": 0.908230830803502, "compression/movement_sparsity/model_sparsity": 0.8770303077205572, "compression_loss": 103.72720336914062, "distillation_loss": 3.684499502182007, "epoch": 4.15, "learning_rate": 3.247393632009017e-05, "loss": 107.7523, "step": 4915, "task_loss": 1.4617326259613037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9776188003616275, "compression/movement_sparsity/importance_threshold": -0.00015919967012034946, "compression/movement_sparsity/linear_layer_sparsity": 0.9082855865812858, "compression/movement_sparsity/model_sparsity": 0.8770831824689258, "compression_loss": 103.73367309570312, "distillation_loss": 6.725152969360352, "epoch": 4.16, "learning_rate": 3.246924016154785e-05, "loss": 108.8414, "step": 4916, "task_loss": 4.39544153213501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9776858098968216, "compression/movement_sparsity/importance_threshold": -0.00015872302471839697, "compression/movement_sparsity/linear_layer_sparsity": 0.9083400919515493, "compression/movement_sparsity/model_sparsity": 0.8771358154120427, "compression_loss": 103.74011993408203, "distillation_loss": 4.617304801940918, "epoch": 4.16, "learning_rate": 3.2464544003005545e-05, "loss": 108.0824, "step": 4917, "task_loss": 2.6728384494781494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.977752685546875, "compression/movement_sparsity/importance_threshold": -0.00015824733165459293, "compression/movement_sparsity/linear_layer_sparsity": 0.9083496909064962, "compression/movement_sparsity/model_sparsity": 0.8771450846133573, "compression_loss": 103.74655151367188, "distillation_loss": 3.7366747856140137, "epoch": 4.16, "learning_rate": 3.245984784446323e-05, "loss": 107.7426, "step": 4918, "task_loss": 2.8922839164733887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.977819427445673, "compression/movement_sparsity/importance_threshold": -0.00015777258997659842, "compression/movement_sparsity/linear_layer_sparsity": 0.9083941919001133, "compression/movement_sparsity/model_sparsity": 0.8771880568609425, "compression_loss": 103.75296020507812, "distillation_loss": 3.8784046173095703, "epoch": 4.16, "learning_rate": 3.245515168592092e-05, "loss": 108.3962, "step": 4919, "task_loss": 1.7650163173675537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9778860357271005, "compression/movement_sparsity/importance_threshold": -0.00015729879873207717, "compression/movement_sparsity/linear_layer_sparsity": 0.9084228099024393, "compression/movement_sparsity/model_sparsity": 0.8772156917468494, "compression_loss": 103.7593994140625, "distillation_loss": 4.6056365966796875, "epoch": 4.16, "learning_rate": 3.2450455527378604e-05, "loss": 107.6049, "step": 4920, "task_loss": 2.6822972297668457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.977952510525043, "compression/movement_sparsity/importance_threshold": -0.00015682595696868938, "compression/movement_sparsity/linear_layer_sparsity": 0.9083461255803731, "compression/movement_sparsity/model_sparsity": 0.8771416417671547, "compression_loss": 103.7657699584961, "distillation_loss": 2.9717354774475098, "epoch": 4.16, "learning_rate": 3.244575936883629e-05, "loss": 107.6581, "step": 4921, "task_loss": 2.7596170902252197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9780188519733853, "compression/movement_sparsity/importance_threshold": -0.00015635406373409617, "compression/movement_sparsity/linear_layer_sparsity": 0.9084007621164806, "compression/movement_sparsity/model_sparsity": 0.8771944013701652, "compression_loss": 103.77218627929688, "distillation_loss": 5.1159868240356445, "epoch": 4.16, "learning_rate": 3.2441063210293983e-05, "loss": 108.8533, "step": 4922, "task_loss": 3.052698850631714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9780850602060127, "compression/movement_sparsity/importance_threshold": -0.0001558831180759621, "compression/movement_sparsity/linear_layer_sparsity": 0.9083901615314524, "compression/movement_sparsity/model_sparsity": 0.8771841649478439, "compression_loss": 103.77848815917969, "distillation_loss": 3.9110641479492188, "epoch": 4.16, "learning_rate": 3.243636705175167e-05, "loss": 107.6174, "step": 4923, "task_loss": 2.5485146045684814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9781511353568103, "compression/movement_sparsity/importance_threshold": -0.00015541311904194739, "compression/movement_sparsity/linear_layer_sparsity": 0.9084269237402737, "compression/movement_sparsity/model_sparsity": 0.8772196642616985, "compression_loss": 103.78487396240234, "distillation_loss": 4.208843231201172, "epoch": 4.16, "learning_rate": 3.2431670893209356e-05, "loss": 108.2742, "step": 4924, "task_loss": 1.8222496509552002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9782170775596634, "compression/movement_sparsity/importance_threshold": -0.00015494406567971315, "compression/movement_sparsity/linear_layer_sparsity": 0.9084394441162914, "compression/movement_sparsity/model_sparsity": 0.8772317545242828, "compression_loss": 103.79117584228516, "distillation_loss": 4.294963836669922, "epoch": 4.16, "learning_rate": 3.242697473466704e-05, "loss": 107.7284, "step": 4925, "task_loss": 1.9331204891204834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9782828869484569, "compression/movement_sparsity/importance_threshold": -0.0001544759570369222, "compression/movement_sparsity/linear_layer_sparsity": 0.908431168743952, "compression/movement_sparsity/model_sparsity": 0.8772237634364414, "compression_loss": 103.79747772216797, "distillation_loss": 5.264091491699219, "epoch": 4.16, "learning_rate": 3.242227857612473e-05, "loss": 108.3009, "step": 4926, "task_loss": 2.5773160457611084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9783485636570761, "compression/movement_sparsity/importance_threshold": -0.00015400879216123566, "compression/movement_sparsity/linear_layer_sparsity": 0.9084787819453221, "compression/movement_sparsity/model_sparsity": 0.8772697409778689, "compression_loss": 103.80384063720703, "distillation_loss": 5.870629787445068, "epoch": 4.16, "learning_rate": 3.241758241758242e-05, "loss": 108.1415, "step": 4927, "task_loss": 2.3657724857330322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9784141078194061, "compression/movement_sparsity/importance_threshold": -0.00015354257010031635, "compression/movement_sparsity/linear_layer_sparsity": 0.9084631851340543, "compression/movement_sparsity/model_sparsity": 0.8772546799650497, "compression_loss": 103.8101577758789, "distillation_loss": 3.877267837524414, "epoch": 4.17, "learning_rate": 3.241288625904011e-05, "loss": 107.5326, "step": 4928, "task_loss": 2.038069486618042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9784795195693319, "compression/movement_sparsity/importance_threshold": -0.0001530772899018271, "compression/movement_sparsity/linear_layer_sparsity": 0.9085282910893462, "compression/movement_sparsity/model_sparsity": 0.8773175493304879, "compression_loss": 103.81647491455078, "distillation_loss": 3.751628875732422, "epoch": 4.17, "learning_rate": 3.2408190100497795e-05, "loss": 108.1379, "step": 4929, "task_loss": 2.264565944671631 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9785447990407389, "compression/movement_sparsity/importance_threshold": -0.0001526129506134264, "compression/movement_sparsity/linear_layer_sparsity": 0.9085630500380047, "compression/movement_sparsity/model_sparsity": 0.877351114202329, "compression_loss": 103.82276916503906, "distillation_loss": 3.923480749130249, "epoch": 4.17, "learning_rate": 3.240349394195548e-05, "loss": 108.6602, "step": 4930, "task_loss": 2.503856897354126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9786099463675121, "compression/movement_sparsity/importance_threshold": -0.00015214955128277884, "compression/movement_sparsity/linear_layer_sparsity": 0.9085827249146039, "compression/movement_sparsity/model_sparsity": 0.87737011318639, "compression_loss": 103.82901000976562, "distillation_loss": 4.7201104164123535, "epoch": 4.17, "learning_rate": 3.239879778341317e-05, "loss": 108.3205, "step": 4931, "task_loss": 2.9924094676971436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9786749616835365, "compression/movement_sparsity/importance_threshold": -0.00015168709095754636, "compression/movement_sparsity/linear_layer_sparsity": 0.9086577637015364, "compression/movement_sparsity/model_sparsity": 0.877442574160145, "compression_loss": 103.83527374267578, "distillation_loss": 5.025539398193359, "epoch": 4.17, "learning_rate": 3.239410162487086e-05, "loss": 108.8731, "step": 4932, "task_loss": 3.5230531692504883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9787398451226975, "compression/movement_sparsity/importance_threshold": -0.0001512255686853892, "compression/movement_sparsity/linear_layer_sparsity": 0.9086839014769942, "compression/movement_sparsity/model_sparsity": 0.8774678140226067, "compression_loss": 103.84144592285156, "distillation_loss": 4.620006561279297, "epoch": 4.17, "learning_rate": 3.238940546632854e-05, "loss": 107.7322, "step": 4933, "task_loss": 2.403836727142334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9788045968188802, "compression/movement_sparsity/importance_threshold": -0.00015076498351396933, "compression/movement_sparsity/linear_layer_sparsity": 0.9087113747592273, "compression/movement_sparsity/model_sparsity": 0.8774943435130773, "compression_loss": 103.84771728515625, "distillation_loss": 4.142625331878662, "epoch": 4.17, "learning_rate": 3.238470930778623e-05, "loss": 107.2989, "step": 4934, "task_loss": 1.838801383972168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9788692169059696, "compression/movement_sparsity/importance_threshold": -0.00015030533449094958, "compression/movement_sparsity/linear_layer_sparsity": 0.9087451797744749, "compression/movement_sparsity/model_sparsity": 0.8775269872220548, "compression_loss": 103.85389709472656, "distillation_loss": 6.114116668701172, "epoch": 4.17, "learning_rate": 3.238001314924392e-05, "loss": 108.2778, "step": 4935, "task_loss": 3.167956829071045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.978933705517851, "compression/movement_sparsity/importance_threshold": -0.0001498466206639919, "compression/movement_sparsity/linear_layer_sparsity": 0.9088115258432008, "compression/movement_sparsity/model_sparsity": 0.8775910540992157, "compression_loss": 103.86007690429688, "distillation_loss": 3.462712287902832, "epoch": 4.17, "learning_rate": 3.237531699070161e-05, "loss": 107.8409, "step": 4936, "task_loss": 1.5385490655899048 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9789980627884093, "compression/movement_sparsity/importance_threshold": -0.00014938884108075742, "compression/movement_sparsity/linear_layer_sparsity": 0.9088716594205885, "compression/movement_sparsity/model_sparsity": 0.8776491219032275, "compression_loss": 103.86625671386719, "distillation_loss": 4.616709232330322, "epoch": 4.17, "learning_rate": 3.237062083215929e-05, "loss": 107.1366, "step": 4937, "task_loss": 2.9715538024902344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9790622888515299, "compression/movement_sparsity/importance_threshold": -0.00014893199478890808, "compression/movement_sparsity/linear_layer_sparsity": 0.9089175436176513, "compression/movement_sparsity/model_sparsity": 0.8776934298369649, "compression_loss": 103.87237548828125, "distillation_loss": 4.441533088684082, "epoch": 4.17, "learning_rate": 3.2365924673616985e-05, "loss": 108.2958, "step": 4938, "task_loss": 2.178976058959961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9791263838410977, "compression/movement_sparsity/importance_threshold": -0.00014847608083610672, "compression/movement_sparsity/linear_layer_sparsity": 0.9089667188849816, "compression/movement_sparsity/model_sparsity": 0.8777409157825816, "compression_loss": 103.87853240966797, "distillation_loss": 4.570040702819824, "epoch": 4.17, "learning_rate": 3.236122851507467e-05, "loss": 107.794, "step": 4939, "task_loss": 1.955387830734253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9791903478909981, "compression/movement_sparsity/importance_threshold": -0.00014802109827001444, "compression/movement_sparsity/linear_layer_sparsity": 0.9090445836996438, "compression/movement_sparsity/model_sparsity": 0.87781610570132, "compression_loss": 103.88465118408203, "distillation_loss": 3.3570470809936523, "epoch": 4.18, "learning_rate": 3.235653235653236e-05, "loss": 107.5309, "step": 4940, "task_loss": 1.2263967990875244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9792541811351161, "compression/movement_sparsity/importance_threshold": -0.00014756704613829233, "compression/movement_sparsity/linear_layer_sparsity": 0.9090641870312371, "compression/movement_sparsity/model_sparsity": 0.8778350355981661, "compression_loss": 103.8907470703125, "distillation_loss": 5.514569282531738, "epoch": 4.18, "learning_rate": 3.2351836197990044e-05, "loss": 107.7892, "step": 4941, "task_loss": 2.436563491821289 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9793178837073369, "compression/movement_sparsity/importance_threshold": -0.00014711392348860323, "compression/movement_sparsity/linear_layer_sparsity": 0.9090293207650698, "compression/movement_sparsity/model_sparsity": 0.8778013670955029, "compression_loss": 103.89678192138672, "distillation_loss": 4.355862617492676, "epoch": 4.18, "learning_rate": 3.234714003944773e-05, "loss": 107.5107, "step": 4942, "task_loss": 2.5372984409332275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9793814557415456, "compression/movement_sparsity/importance_threshold": -0.00014666172936860823, "compression/movement_sparsity/linear_layer_sparsity": 0.9090797480700019, "compression/movement_sparsity/model_sparsity": 0.877850062067378, "compression_loss": 103.90287017822266, "distillation_loss": 4.892314910888672, "epoch": 4.18, "learning_rate": 3.2342443880905424e-05, "loss": 108.9864, "step": 4943, "task_loss": 3.011138677597046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9794448973716272, "compression/movement_sparsity/importance_threshold": -0.00014621046282597017, "compression/movement_sparsity/linear_layer_sparsity": 0.9090922803701872, "compression/movement_sparsity/model_sparsity": 0.8778621638444981, "compression_loss": 103.90889739990234, "distillation_loss": 4.778339862823486, "epoch": 4.18, "learning_rate": 3.233774772236311e-05, "loss": 108.2872, "step": 4944, "task_loss": 2.3556268215179443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.979508208731467, "compression/movement_sparsity/importance_threshold": -0.00014576012290835102, "compression/movement_sparsity/linear_layer_sparsity": 0.9091201590741199, "compression/movement_sparsity/model_sparsity": 0.8778890848291857, "compression_loss": 103.91493225097656, "distillation_loss": 5.5772247314453125, "epoch": 4.18, "learning_rate": 3.2333051563820796e-05, "loss": 107.8847, "step": 4945, "task_loss": 3.5369229316711426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9795713899549502, "compression/movement_sparsity/importance_threshold": -0.000145310708663411, "compression/movement_sparsity/linear_layer_sparsity": 0.9091978211779322, "compression/movement_sparsity/model_sparsity": 0.8779640790008155, "compression_loss": 103.92103576660156, "distillation_loss": 5.412211894989014, "epoch": 4.18, "learning_rate": 3.232835540527848e-05, "loss": 108.2152, "step": 4946, "task_loss": 2.033102512359619 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9796344411759619, "compression/movement_sparsity/importance_threshold": -0.00014486221913881294, "compression/movement_sparsity/linear_layer_sparsity": 0.9093142487507289, "compression/movement_sparsity/model_sparsity": 0.8780765069283134, "compression_loss": 103.92698669433594, "distillation_loss": 5.664648056030273, "epoch": 4.18, "learning_rate": 3.232365924673617e-05, "loss": 107.9189, "step": 4947, "task_loss": 2.568807363510132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9796973625283871, "compression/movement_sparsity/importance_threshold": -0.00014441465338221968, "compression/movement_sparsity/linear_layer_sparsity": 0.9092860361701024, "compression/movement_sparsity/model_sparsity": 0.8780492635366236, "compression_loss": 103.93302917480469, "distillation_loss": 4.144012451171875, "epoch": 4.18, "learning_rate": 3.231896308819386e-05, "loss": 108.5346, "step": 4948, "task_loss": 2.407966136932373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9797601541461111, "compression/movement_sparsity/importance_threshold": -0.00014396801044129146, "compression/movement_sparsity/linear_layer_sparsity": 0.9093436656722865, "compression/movement_sparsity/model_sparsity": 0.8781049132881186, "compression_loss": 103.93902587890625, "distillation_loss": 3.9243597984313965, "epoch": 4.18, "learning_rate": 3.231426692965155e-05, "loss": 108.673, "step": 4949, "task_loss": 2.823547601699829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9798228161630189, "compression/movement_sparsity/importance_threshold": -0.0001435222893636911, "compression/movement_sparsity/linear_layer_sparsity": 0.9093915769778473, "compression/movement_sparsity/model_sparsity": 0.878151178692941, "compression_loss": 103.94502258300781, "distillation_loss": 5.393031120300293, "epoch": 4.18, "learning_rate": 3.2309570771109235e-05, "loss": 108.2912, "step": 4950, "task_loss": 2.6801705360412598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9798853487129957, "compression/movement_sparsity/importance_threshold": -0.00014307748919708055, "compression/movement_sparsity/linear_layer_sparsity": 0.9095408437083131, "compression/movement_sparsity/model_sparsity": 0.8782953176520171, "compression_loss": 103.9510269165039, "distillation_loss": 3.9873499870300293, "epoch": 4.19, "learning_rate": 3.230487461256692e-05, "loss": 108.8051, "step": 4951, "task_loss": 2.5692694187164307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9799477519299268, "compression/movement_sparsity/importance_threshold": -0.00014263360898912093, "compression/movement_sparsity/linear_layer_sparsity": 0.9096540398316804, "compression/movement_sparsity/model_sparsity": 0.8784046251403147, "compression_loss": 103.95695495605469, "distillation_loss": 5.185629844665527, "epoch": 4.19, "learning_rate": 3.230017845402461e-05, "loss": 108.7475, "step": 4952, "task_loss": 2.4642436504364014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9800100259476971, "compression/movement_sparsity/importance_threshold": -0.0001421906477874742, "compression/movement_sparsity/linear_layer_sparsity": 0.9096846014733311, "compression/movement_sparsity/model_sparsity": 0.878434136895556, "compression_loss": 103.96292114257812, "distillation_loss": 3.303194046020508, "epoch": 4.19, "learning_rate": 3.22954822954823e-05, "loss": 107.971, "step": 4953, "task_loss": 1.3737921714782715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9800721709001919, "compression/movement_sparsity/importance_threshold": -0.00014174860463980234, "compression/movement_sparsity/linear_layer_sparsity": 0.9096935922957285, "compression/movement_sparsity/model_sparsity": 0.8784428188555452, "compression_loss": 103.9688720703125, "distillation_loss": 5.887929916381836, "epoch": 4.19, "learning_rate": 3.229078613693998e-05, "loss": 108.7878, "step": 4954, "task_loss": 2.9530484676361084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9801341869212963, "compression/movement_sparsity/importance_threshold": -0.00014130747859376729, "compression/movement_sparsity/linear_layer_sparsity": 0.909712372859755, "compression/movement_sparsity/model_sparsity": 0.8784609542494216, "compression_loss": 103.9747085571289, "distillation_loss": 5.104334831237793, "epoch": 4.19, "learning_rate": 3.228608997839767e-05, "loss": 108.6309, "step": 4955, "task_loss": 3.2497003078460693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9801960741448954, "compression/movement_sparsity/importance_threshold": -0.00014086726869703102, "compression/movement_sparsity/linear_layer_sparsity": 0.9097204216729092, "compression/movement_sparsity/model_sparsity": 0.8784687265610829, "compression_loss": 103.98062133789062, "distillation_loss": 2.934647798538208, "epoch": 4.19, "learning_rate": 3.228139381985536e-05, "loss": 107.6752, "step": 4956, "task_loss": 1.3857160806655884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9802578327048743, "compression/movement_sparsity/importance_threshold": -0.0001404279739972555, "compression/movement_sparsity/linear_layer_sparsity": 0.9097164866975894, "compression/movement_sparsity/model_sparsity": 0.8784649267642707, "compression_loss": 103.9864501953125, "distillation_loss": 6.115760803222656, "epoch": 4.19, "learning_rate": 3.2276697661313046e-05, "loss": 109.1379, "step": 4957, "task_loss": 2.5139880180358887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9803194627351183, "compression/movement_sparsity/importance_threshold": -0.0001399895935421027, "compression/movement_sparsity/linear_layer_sparsity": 0.9097186926686021, "compression/movement_sparsity/model_sparsity": 0.8784670569533927, "compression_loss": 103.9923095703125, "distillation_loss": 5.4551825523376465, "epoch": 4.19, "learning_rate": 3.227200150277074e-05, "loss": 108.7586, "step": 4958, "task_loss": 2.6156771183013916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9803809643695124, "compression/movement_sparsity/importance_threshold": -0.00013955212637923372, "compression/movement_sparsity/linear_layer_sparsity": 0.9097799471177475, "compression/movement_sparsity/model_sparsity": 0.8785262071237693, "compression_loss": 103.9981918334961, "distillation_loss": 4.7243266105651855, "epoch": 4.19, "learning_rate": 3.226730534422842e-05, "loss": 108.8952, "step": 4959, "task_loss": 2.482116460800171 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9804423377419418, "compression/movement_sparsity/importance_threshold": -0.0001391155715563105, "compression/movement_sparsity/linear_layer_sparsity": 0.9098341424596524, "compression/movement_sparsity/model_sparsity": 0.8785785406889555, "compression_loss": 104.00402069091797, "distillation_loss": 4.419104099273682, "epoch": 4.19, "learning_rate": 3.226260918568611e-05, "loss": 108.0099, "step": 4960, "task_loss": 1.7853549718856812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9805035829862916, "compression/movement_sparsity/importance_threshold": -0.00013867992812099592, "compression/movement_sparsity/linear_layer_sparsity": 0.9098719659193933, "compression/movement_sparsity/model_sparsity": 0.8786150647964958, "compression_loss": 104.00981140136719, "distillation_loss": 3.44334077835083, "epoch": 4.19, "learning_rate": 3.22579130271438e-05, "loss": 108.1157, "step": 4961, "task_loss": 2.246476173400879 }, { "compression/movement_sparsity/importance_regularization_factor": 0.980564700236447, "compression/movement_sparsity/importance_threshold": -0.00013824519512095016, "compression/movement_sparsity/linear_layer_sparsity": 0.9099699587290249, "compression/movement_sparsity/model_sparsity": 0.8787096912516552, "compression_loss": 104.015625, "distillation_loss": 3.68894100189209, "epoch": 4.19, "learning_rate": 3.225321686860149e-05, "loss": 107.703, "step": 4962, "task_loss": 2.5273430347442627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9806256896262932, "compression/movement_sparsity/importance_threshold": -0.00013781137160383608, "compression/movement_sparsity/linear_layer_sparsity": 0.910067569965292, "compression/movement_sparsity/model_sparsity": 0.8788039492416694, "compression_loss": 104.02140808105469, "distillation_loss": 4.087213516235352, "epoch": 4.2, "learning_rate": 3.224852071005917e-05, "loss": 108.0657, "step": 4963, "task_loss": 2.5195515155792236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.980686551289715, "compression/movement_sparsity/importance_threshold": -0.0001373784566173165, "compression/movement_sparsity/linear_layer_sparsity": 0.9101120828830768, "compression/movement_sparsity/model_sparsity": 0.8788469330037904, "compression_loss": 104.02716827392578, "distillation_loss": 3.690401077270508, "epoch": 4.2, "learning_rate": 3.224382455151686e-05, "loss": 108.3931, "step": 4964, "task_loss": 2.469305992126465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.980747285360598, "compression/movement_sparsity/importance_threshold": -0.00013694644920905167, "compression/movement_sparsity/linear_layer_sparsity": 0.9101368493792564, "compression/movement_sparsity/model_sparsity": 0.8788708486946356, "compression_loss": 104.032958984375, "distillation_loss": 3.0549135208129883, "epoch": 4.2, "learning_rate": 3.223912839297455e-05, "loss": 108.291, "step": 4965, "task_loss": 1.8945186138153076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.980807891972827, "compression/movement_sparsity/importance_threshold": -0.0001365153484267044, "compression/movement_sparsity/linear_layer_sparsity": 0.9102259706081669, "compression/movement_sparsity/model_sparsity": 0.878956908335164, "compression_loss": 104.03868103027344, "distillation_loss": 4.572600364685059, "epoch": 4.2, "learning_rate": 3.2234432234432237e-05, "loss": 108.4649, "step": 4966, "task_loss": 2.5164425373077393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9808683712602874, "compression/movement_sparsity/importance_threshold": -0.0001360851533179358, "compression/movement_sparsity/linear_layer_sparsity": 0.9102888944407813, "compression/movement_sparsity/model_sparsity": 0.8790176705405518, "compression_loss": 104.04444122314453, "distillation_loss": 4.196561813354492, "epoch": 4.2, "learning_rate": 3.222973607588992e-05, "loss": 108.7202, "step": 4967, "task_loss": 2.636145830154419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9809287233568642, "compression/movement_sparsity/importance_threshold": -0.00013565586293040784, "compression/movement_sparsity/linear_layer_sparsity": 0.9103116815251334, "compression/movement_sparsity/model_sparsity": 0.8790396748184552, "compression_loss": 104.0501480102539, "distillation_loss": 5.587145805358887, "epoch": 4.2, "learning_rate": 3.222503991734761e-05, "loss": 108.6084, "step": 4968, "task_loss": 3.1466548442840576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9809889483964425, "compression/movement_sparsity/importance_threshold": -0.00013522747631178333, "compression/movement_sparsity/linear_layer_sparsity": 0.9103279937864593, "compression/movement_sparsity/model_sparsity": 0.8790554267034222, "compression_loss": 104.05583953857422, "distillation_loss": 5.2652058601379395, "epoch": 4.2, "learning_rate": 3.22203437588053e-05, "loss": 108.7771, "step": 4969, "task_loss": 2.5573723316192627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9810490465129075, "compression/movement_sparsity/importance_threshold": -0.00013479999250972253, "compression/movement_sparsity/linear_layer_sparsity": 0.9103244165361686, "compression/movement_sparsity/model_sparsity": 0.8790519723426837, "compression_loss": 104.06156158447266, "distillation_loss": 4.762996673583984, "epoch": 4.2, "learning_rate": 3.221564760026299e-05, "loss": 109.5789, "step": 4970, "task_loss": 3.6615588665008545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9811090178401443, "compression/movement_sparsity/importance_threshold": -0.00013437341057188912, "compression/movement_sparsity/linear_layer_sparsity": 0.9104037837959529, "compression/movement_sparsity/model_sparsity": 0.8791286130929322, "compression_loss": 104.06730651855469, "distillation_loss": 3.8798813819885254, "epoch": 4.2, "learning_rate": 3.221095144172067e-05, "loss": 108.0034, "step": 4971, "task_loss": 2.340672016143799 }, { "compression/movement_sparsity/importance_regularization_factor": 0.981168862512038, "compression/movement_sparsity/importance_threshold": -0.00013394772954594333, "compression/movement_sparsity/linear_layer_sparsity": 0.9104245199234716, "compression/movement_sparsity/model_sparsity": 0.8791486368706789, "compression_loss": 104.07293701171875, "distillation_loss": 3.747239589691162, "epoch": 4.2, "learning_rate": 3.220625528317836e-05, "loss": 109.3658, "step": 4972, "task_loss": 2.5439038276672363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.981228580662474, "compression/movement_sparsity/importance_threshold": -0.000133522948479548, "compression/movement_sparsity/linear_layer_sparsity": 0.9104337373050542, "compression/movement_sparsity/model_sparsity": 0.8791575376068481, "compression_loss": 104.07865142822266, "distillation_loss": 4.323878288269043, "epoch": 4.2, "learning_rate": 3.220155912463605e-05, "loss": 108.9487, "step": 4973, "task_loss": 2.44897198677063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9812881724253372, "compression/movement_sparsity/importance_threshold": -0.0001330990664203651, "compression/movement_sparsity/linear_layer_sparsity": 0.9104053577860808, "compression/movement_sparsity/model_sparsity": 0.8791301330116571, "compression_loss": 104.08428192138672, "distillation_loss": 4.215033531188965, "epoch": 4.2, "learning_rate": 3.219686296609374e-05, "loss": 108.1894, "step": 4974, "task_loss": 2.5775959491729736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9813476379345127, "compression/movement_sparsity/importance_threshold": -0.00013267608241605486, "compression/movement_sparsity/linear_layer_sparsity": 0.910400766981541, "compression/movement_sparsity/model_sparsity": 0.8791256999153763, "compression_loss": 104.08997344970703, "distillation_loss": 4.990140914916992, "epoch": 4.21, "learning_rate": 3.219216680755143e-05, "loss": 108.918, "step": 4975, "task_loss": 2.913695812225342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9814069773238858, "compression/movement_sparsity/importance_threshold": -0.00013225399551428183, "compression/movement_sparsity/linear_layer_sparsity": 0.9104503357464033, "compression/movement_sparsity/model_sparsity": 0.8791735658406741, "compression_loss": 104.09563446044922, "distillation_loss": 4.103178977966309, "epoch": 4.21, "learning_rate": 3.2187470649009114e-05, "loss": 108.3962, "step": 4976, "task_loss": 2.569822072982788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9814661907273416, "compression/movement_sparsity/importance_threshold": -0.0001318328047627054, "compression/movement_sparsity/linear_layer_sparsity": 0.9105818116187563, "compression/movement_sparsity/model_sparsity": 0.8793005251123447, "compression_loss": 104.10128021240234, "distillation_loss": 3.1801366806030273, "epoch": 4.21, "learning_rate": 3.21827744904668e-05, "loss": 107.7902, "step": 4977, "task_loss": 1.6586003303527832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9815252782787652, "compression/movement_sparsity/importance_threshold": -0.00013141250920898837, "compression/movement_sparsity/linear_layer_sparsity": 0.9105830517321905, "compression/movement_sparsity/model_sparsity": 0.8793017226240674, "compression_loss": 104.10690307617188, "distillation_loss": 3.377258777618408, "epoch": 4.21, "learning_rate": 3.2178078331924486e-05, "loss": 108.4796, "step": 4978, "task_loss": 1.2192344665527344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9815842401120418, "compression/movement_sparsity/importance_threshold": -0.00013099310790079272, "compression/movement_sparsity/linear_layer_sparsity": 0.9105589172168954, "compression/movement_sparsity/model_sparsity": 0.8792784172036192, "compression_loss": 104.11251831054688, "distillation_loss": 3.6899490356445312, "epoch": 4.21, "learning_rate": 3.217338217338218e-05, "loss": 108.0114, "step": 4979, "task_loss": 2.0587382316589355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9816430763610565, "compression/movement_sparsity/importance_threshold": -0.00013057459988577956, "compression/movement_sparsity/linear_layer_sparsity": 0.9105548510757316, "compression/movement_sparsity/model_sparsity": 0.8792744907469133, "compression_loss": 104.11808776855469, "distillation_loss": 6.506047248840332, "epoch": 4.21, "learning_rate": 3.216868601483986e-05, "loss": 109.0207, "step": 4980, "task_loss": 2.9126169681549072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9817017871596944, "compression/movement_sparsity/importance_threshold": -0.00013015698421161258, "compression/movement_sparsity/linear_layer_sparsity": 0.9105650700873955, "compression/movement_sparsity/model_sparsity": 0.8792843587040892, "compression_loss": 104.1236343383789, "distillation_loss": 7.360722064971924, "epoch": 4.21, "learning_rate": 3.216398985629755e-05, "loss": 109.5575, "step": 4981, "task_loss": 3.3783562183380127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9817603726418407, "compression/movement_sparsity/importance_threshold": -0.000129740259925952, "compression/movement_sparsity/linear_layer_sparsity": 0.9105949758998263, "compression/movement_sparsity/model_sparsity": 0.8793132371598619, "compression_loss": 104.12920379638672, "distillation_loss": 4.282609462738037, "epoch": 4.21, "learning_rate": 3.215929369775524e-05, "loss": 107.7004, "step": 4982, "task_loss": 2.8001925945281982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9818188329413805, "compression/movement_sparsity/importance_threshold": -0.0001293244260764598, "compression/movement_sparsity/linear_layer_sparsity": 0.9106590086800309, "compression/movement_sparsity/model_sparsity": 0.8793750702170786, "compression_loss": 104.1347427368164, "distillation_loss": 5.650304794311523, "epoch": 4.21, "learning_rate": 3.2154597539212925e-05, "loss": 108.8648, "step": 4983, "task_loss": 2.979151964187622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.981877168192199, "compression/movement_sparsity/importance_threshold": -0.00012890948171079882, "compression/movement_sparsity/linear_layer_sparsity": 0.9107323780834944, "compression/movement_sparsity/model_sparsity": 0.8794459191558224, "compression_loss": 104.14026641845703, "distillation_loss": 4.048472881317139, "epoch": 4.21, "learning_rate": 3.214990138067061e-05, "loss": 108.7979, "step": 4984, "task_loss": 3.037482976913452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9819353785281812, "compression/movement_sparsity/importance_threshold": -0.00012849542587662927, "compression/movement_sparsity/linear_layer_sparsity": 0.9107875869796485, "compression/movement_sparsity/model_sparsity": 0.8794992314565512, "compression_loss": 104.14582824707031, "distillation_loss": 4.102701187133789, "epoch": 4.21, "learning_rate": 3.21452052221283e-05, "loss": 108.4684, "step": 4985, "task_loss": 3.298994541168213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9819934640832124, "compression/movement_sparsity/importance_threshold": -0.00012808225762161486, "compression/movement_sparsity/linear_layer_sparsity": 0.9108333161625319, "compression/movement_sparsity/model_sparsity": 0.8795433897013232, "compression_loss": 104.15140533447266, "distillation_loss": 4.177999019622803, "epoch": 4.21, "learning_rate": 3.214050906358599e-05, "loss": 108.7291, "step": 4986, "task_loss": 2.226282835006714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9820514249911777, "compression/movement_sparsity/importance_threshold": -0.00012766997599341582, "compression/movement_sparsity/linear_layer_sparsity": 0.9108468620169663, "compression/movement_sparsity/model_sparsity": 0.8795564702139858, "compression_loss": 104.15691375732422, "distillation_loss": 5.531336784362793, "epoch": 4.22, "learning_rate": 3.213581290504368e-05, "loss": 108.8494, "step": 4987, "task_loss": 2.5026793479919434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9821092613859622, "compression/movement_sparsity/importance_threshold": -0.00012725858003969498, "compression/movement_sparsity/linear_layer_sparsity": 0.91090050884716, "compression/movement_sparsity/model_sparsity": 0.8796082741105254, "compression_loss": 104.1624755859375, "distillation_loss": 5.450996398925781, "epoch": 4.22, "learning_rate": 3.213111674650136e-05, "loss": 108.76, "step": 4988, "task_loss": 4.452157497406006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9821669734014511, "compression/movement_sparsity/importance_threshold": -0.00012684806880811344, "compression/movement_sparsity/linear_layer_sparsity": 0.9108361421902617, "compression/movement_sparsity/model_sparsity": 0.8795461186463065, "compression_loss": 104.16800689697266, "distillation_loss": 5.13172721862793, "epoch": 4.22, "learning_rate": 3.212642058795905e-05, "loss": 108.5273, "step": 4989, "task_loss": 3.3187296390533447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9822245611715296, "compression/movement_sparsity/importance_threshold": -0.00012643844134633403, "compression/movement_sparsity/linear_layer_sparsity": 0.910844644121786, "compression/movement_sparsity/model_sparsity": 0.879554328510328, "compression_loss": 104.17357635498047, "distillation_loss": 3.583031177520752, "epoch": 4.22, "learning_rate": 3.2121724429416736e-05, "loss": 108.5508, "step": 4990, "task_loss": 3.1856579780578613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9822820248300825, "compression/movement_sparsity/importance_threshold": -0.00012602969670201872, "compression/movement_sparsity/linear_layer_sparsity": 0.9108434159325195, "compression/movement_sparsity/model_sparsity": 0.8795531425131412, "compression_loss": 104.17901611328125, "distillation_loss": 4.185683250427246, "epoch": 4.22, "learning_rate": 3.211702827087443e-05, "loss": 108.7316, "step": 4991, "task_loss": 1.6830796003341675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9823393645109953, "compression/movement_sparsity/importance_threshold": -0.00012562183392282773, "compression/movement_sparsity/linear_layer_sparsity": 0.9108599070563599, "compression/movement_sparsity/model_sparsity": 0.879569067116145, "compression_loss": 104.18450927734375, "distillation_loss": 4.452105522155762, "epoch": 4.22, "learning_rate": 3.2112332112332115e-05, "loss": 108.1517, "step": 4992, "task_loss": 2.4102354049682617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9823965803481531, "compression/movement_sparsity/importance_threshold": -0.0001252148520564239, "compression/movement_sparsity/linear_layer_sparsity": 0.9108509877789683, "compression/movement_sparsity/model_sparsity": 0.8795604542433707, "compression_loss": 104.18997955322266, "distillation_loss": 6.000765800476074, "epoch": 4.22, "learning_rate": 3.21076359537898e-05, "loss": 108.2364, "step": 4993, "task_loss": 3.6559572219848633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9824536724754409, "compression/movement_sparsity/importance_threshold": -0.00012480875015047008, "compression/movement_sparsity/linear_layer_sparsity": 0.9108308597839989, "compression/movement_sparsity/model_sparsity": 0.8795410177069495, "compression_loss": 104.1954345703125, "distillation_loss": 5.483771324157715, "epoch": 4.22, "learning_rate": 3.210293979524749e-05, "loss": 108.8803, "step": 4994, "task_loss": 3.355335235595703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9825106410267439, "compression/movement_sparsity/importance_threshold": -0.00012440352725262647, "compression/movement_sparsity/linear_layer_sparsity": 0.9109159625684161, "compression/movement_sparsity/model_sparsity": 0.8796231969489151, "compression_loss": 104.20084381103516, "distillation_loss": 5.826418876647949, "epoch": 4.22, "learning_rate": 3.2098243636705174e-05, "loss": 108.5325, "step": 4995, "task_loss": 3.1432735919952393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9825674861359474, "compression/movement_sparsity/importance_threshold": -0.00012399918241055505, "compression/movement_sparsity/linear_layer_sparsity": 0.9109646847173762, "compression/movement_sparsity/model_sparsity": 0.8796702453421716, "compression_loss": 104.20623779296875, "distillation_loss": 3.5400075912475586, "epoch": 4.22, "learning_rate": 3.209354747816287e-05, "loss": 108.1154, "step": 4996, "task_loss": 1.2153668403625488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9826242079369362, "compression/movement_sparsity/importance_threshold": -0.00012359571467191952, "compression/movement_sparsity/linear_layer_sparsity": 0.9110272150524588, "compression/movement_sparsity/model_sparsity": 0.8797306275678782, "compression_loss": 104.21159362792969, "distillation_loss": 4.284142971038818, "epoch": 4.22, "learning_rate": 3.208885131962055e-05, "loss": 108.2978, "step": 4997, "task_loss": 2.965482711791992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9826808065635957, "compression/movement_sparsity/importance_threshold": -0.00012319312308437923, "compression/movement_sparsity/linear_layer_sparsity": 0.911115227333779, "compression/movement_sparsity/model_sparsity": 0.8798156163565777, "compression_loss": 104.21698760986328, "distillation_loss": 5.611866474151611, "epoch": 4.22, "learning_rate": 3.208415516107824e-05, "loss": 108.8044, "step": 4998, "task_loss": 1.9454728364944458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9827372821498109, "compression/movement_sparsity/importance_threshold": -0.0001227914066955979, "compression/movement_sparsity/linear_layer_sparsity": 0.9111046029004155, "compression/movement_sparsity/model_sparsity": 0.8798053569051848, "compression_loss": 104.22232055664062, "distillation_loss": 4.342247009277344, "epoch": 4.23, "learning_rate": 3.2079459002535926e-05, "loss": 108.8126, "step": 4999, "task_loss": 2.2618579864501953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9827936348294671, "compression/movement_sparsity/importance_threshold": -0.00012239056455323662, "compression/movement_sparsity/linear_layer_sparsity": 0.9111689099364757, "compression/movement_sparsity/model_sparsity": 0.8798674547967248, "compression_loss": 104.22766876220703, "distillation_loss": 4.341944217681885, "epoch": 4.23, "learning_rate": 3.207476284399362e-05, "loss": 108.0993, "step": 5000, "task_loss": 2.4159770011901855 }, { "epoch": 4.23, "eval_accuracy": 0.5490297029702971, "eval_loss": 108.64608001708984, "eval_runtime": 226.7664, "eval_samples_per_second": 111.348, "eval_steps_per_second": 0.873, "step": 5000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9828498647364494, "compression/movement_sparsity/importance_threshold": -0.00012199059570495736, "compression/movement_sparsity/linear_layer_sparsity": 0.9111523949643, "compression/movement_sparsity/model_sparsity": 0.8798515071646493, "compression_loss": 104.23291778564453, "distillation_loss": 4.269426345825195, "epoch": 4.23, "learning_rate": 3.20700666854513e-05, "loss": 109.0562, "step": 5001, "task_loss": 1.8193352222442627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9829059720046428, "compression/movement_sparsity/importance_threshold": -0.00012159149919842207, "compression/movement_sparsity/linear_layer_sparsity": 0.911143094113544, "compression/movement_sparsity/model_sparsity": 0.8798425258267296, "compression_loss": 104.23820495605469, "distillation_loss": 5.251678943634033, "epoch": 4.23, "learning_rate": 3.2065370526908985e-05, "loss": 108.3553, "step": 5002, "task_loss": 2.8533501625061035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9829619567679325, "compression/movement_sparsity/importance_threshold": -0.00012119327408129273, "compression/movement_sparsity/linear_layer_sparsity": 0.9111616838908884, "compression/movement_sparsity/model_sparsity": 0.8798604769880333, "compression_loss": 104.24346160888672, "distillation_loss": 5.447854042053223, "epoch": 4.23, "learning_rate": 3.206067436836668e-05, "loss": 109.0987, "step": 5003, "task_loss": 2.2596349716186523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9830178191602038, "compression/movement_sparsity/importance_threshold": -0.00012079591940122956, "compression/movement_sparsity/linear_layer_sparsity": 0.9112263725003129, "compression/movement_sparsity/model_sparsity": 0.8799229433447187, "compression_loss": 104.24871826171875, "distillation_loss": 4.070091247558594, "epoch": 4.23, "learning_rate": 3.2055978209824365e-05, "loss": 108.7603, "step": 5004, "task_loss": 2.334794521331787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9830735593153417, "compression/movement_sparsity/importance_threshold": -0.00012039943420589713, "compression/movement_sparsity/linear_layer_sparsity": 0.9113039034382813, "compression/movement_sparsity/model_sparsity": 0.8799978108564548, "compression_loss": 104.25397491455078, "distillation_loss": 5.786252975463867, "epoch": 4.23, "learning_rate": 3.205128205128206e-05, "loss": 109.5852, "step": 5005, "task_loss": 2.44339919090271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9831291773672313, "compression/movement_sparsity/importance_threshold": -0.00012000381754295568, "compression/movement_sparsity/linear_layer_sparsity": 0.91122633672781, "compression/movement_sparsity/model_sparsity": 0.8799229088011112, "compression_loss": 104.25923156738281, "distillation_loss": 4.669011116027832, "epoch": 4.23, "learning_rate": 3.204658589273974e-05, "loss": 108.4856, "step": 5006, "task_loss": 3.0763187408447266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9831846734497577, "compression/movement_sparsity/importance_threshold": -0.00011960906846006716, "compression/movement_sparsity/linear_layer_sparsity": 0.9112521763990769, "compression/movement_sparsity/model_sparsity": 0.8799478608001781, "compression_loss": 104.26447296142578, "distillation_loss": 4.469457149505615, "epoch": 4.23, "learning_rate": 3.204188973419743e-05, "loss": 108.5556, "step": 5007, "task_loss": 1.889810562133789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9832400476968063, "compression/movement_sparsity/importance_threshold": -0.00011921518600489355, "compression/movement_sparsity/linear_layer_sparsity": 0.9112433048183558, "compression/movement_sparsity/model_sparsity": 0.8799392939855469, "compression_loss": 104.26966857910156, "distillation_loss": 4.056338787078857, "epoch": 4.23, "learning_rate": 3.203719357565512e-05, "loss": 108.5541, "step": 5008, "task_loss": 1.997098684310913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.983295300242262, "compression/movement_sparsity/importance_threshold": -0.00011882216922509681, "compression/movement_sparsity/linear_layer_sparsity": 0.9111815137816668, "compression/movement_sparsity/model_sparsity": 0.8798796256610596, "compression_loss": 104.2748794555664, "distillation_loss": 5.917267799377441, "epoch": 4.23, "learning_rate": 3.2032497417112803e-05, "loss": 108.835, "step": 5009, "task_loss": 2.6058504581451416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.98335043122001, "compression/movement_sparsity/importance_threshold": -0.0001184300171683389, "compression/movement_sparsity/linear_layer_sparsity": 0.9112346240243169, "compression/movement_sparsity/model_sparsity": 0.8799309114034884, "compression_loss": 104.28009796142578, "distillation_loss": 5.120620250701904, "epoch": 4.23, "learning_rate": 3.202780125857049e-05, "loss": 109.0104, "step": 5010, "task_loss": 2.774747371673584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9834054407639355, "compression/movement_sparsity/importance_threshold": -0.00011803872888228092, "compression/movement_sparsity/linear_layer_sparsity": 0.9113182839844501, "compression/movement_sparsity/model_sparsity": 0.880011697386623, "compression_loss": 104.2852554321289, "distillation_loss": 3.936546802520752, "epoch": 4.24, "learning_rate": 3.2023105100028176e-05, "loss": 108.1228, "step": 5011, "task_loss": 2.667952299118042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9834603290079236, "compression/movement_sparsity/importance_threshold": -0.00011764830341458657, "compression/movement_sparsity/linear_layer_sparsity": 0.9114018485512423, "compression/movement_sparsity/model_sparsity": 0.880092391253471, "compression_loss": 104.2904052734375, "distillation_loss": 5.0171356201171875, "epoch": 4.24, "learning_rate": 3.201840894148587e-05, "loss": 108.531, "step": 5012, "task_loss": 2.020416021347046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9835150960858594, "compression/movement_sparsity/importance_threshold": -0.00011725873981291608, "compression/movement_sparsity/linear_layer_sparsity": 0.9114716884010855, "compression/movement_sparsity/model_sparsity": 0.8801598318896198, "compression_loss": 104.2955322265625, "distillation_loss": 4.268305778503418, "epoch": 4.24, "learning_rate": 3.2013712782943556e-05, "loss": 108.4724, "step": 5013, "task_loss": 2.081284999847412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9835697421316282, "compression/movement_sparsity/importance_threshold": -0.00011687003712493056, "compression/movement_sparsity/linear_layer_sparsity": 0.9115286859223849, "compression/movement_sparsity/model_sparsity": 0.8802148713707176, "compression_loss": 104.30072021484375, "distillation_loss": 5.011181831359863, "epoch": 4.24, "learning_rate": 3.200901662440124e-05, "loss": 108.2344, "step": 5014, "task_loss": 3.0802648067474365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9836242672791149, "compression/movement_sparsity/importance_threshold": -0.00011648219439829369, "compression/movement_sparsity/linear_layer_sparsity": 0.9116050005952544, "compression/movement_sparsity/model_sparsity": 0.8802885643998027, "compression_loss": 104.30586242675781, "distillation_loss": 4.295495986938477, "epoch": 4.24, "learning_rate": 3.200432046585893e-05, "loss": 108.2117, "step": 5015, "task_loss": 2.511744737625122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9836786716622047, "compression/movement_sparsity/importance_threshold": -0.00011609521068066744, "compression/movement_sparsity/linear_layer_sparsity": 0.9115718156367238, "compression/movement_sparsity/model_sparsity": 0.8802565194466865, "compression_loss": 104.31096649169922, "distillation_loss": 5.037894248962402, "epoch": 4.24, "learning_rate": 3.1999624307316615e-05, "loss": 108.5593, "step": 5016, "task_loss": 2.7582225799560547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9837329554147829, "compression/movement_sparsity/importance_threshold": -0.00011570908501971205, "compression/movement_sparsity/linear_layer_sparsity": 0.9115795186490167, "compression/movement_sparsity/model_sparsity": 0.8802639578368098, "compression_loss": 104.31607055664062, "distillation_loss": 5.043911933898926, "epoch": 4.24, "learning_rate": 3.199492814877431e-05, "loss": 108.9001, "step": 5017, "task_loss": 2.1440203189849854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9837871186707345, "compression/movement_sparsity/importance_threshold": -0.00011532381646308949, "compression/movement_sparsity/linear_layer_sparsity": 0.9115940661335323, "compression/movement_sparsity/model_sparsity": 0.8802780055704791, "compression_loss": 104.32118225097656, "distillation_loss": 4.4111433029174805, "epoch": 4.24, "learning_rate": 3.199023199023199e-05, "loss": 108.7346, "step": 5018, "task_loss": 3.195502996444702 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9838411615639447, "compression/movement_sparsity/importance_threshold": -0.00011493940405846344, "compression/movement_sparsity/linear_layer_sparsity": 0.9116254505427499, "compression/movement_sparsity/model_sparsity": 0.8803083118286903, "compression_loss": 104.32625579833984, "distillation_loss": 4.927526950836182, "epoch": 4.24, "learning_rate": 3.198553583168968e-05, "loss": 109.2043, "step": 5019, "task_loss": 4.000336647033691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9838950842282987, "compression/movement_sparsity/importance_threshold": -0.00011455584685349327, "compression/movement_sparsity/linear_layer_sparsity": 0.9116560956535741, "compression/movement_sparsity/model_sparsity": 0.8803379041856823, "compression_loss": 104.33131408691406, "distillation_loss": 5.458263874053955, "epoch": 4.24, "learning_rate": 3.198083967314737e-05, "loss": 108.621, "step": 5020, "task_loss": 3.0760648250579834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9839488867976816, "compression/movement_sparsity/importance_threshold": -0.00011417314389584269, "compression/movement_sparsity/linear_layer_sparsity": 0.9117428558972926, "compression/movement_sparsity/model_sparsity": 0.8804216839481234, "compression_loss": 104.33642578125, "distillation_loss": 4.128429889678955, "epoch": 4.24, "learning_rate": 3.197614351460505e-05, "loss": 108.4435, "step": 5021, "task_loss": 1.8855867385864258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9840025694059783, "compression/movement_sparsity/importance_threshold": -0.00011379129423317366, "compression/movement_sparsity/linear_layer_sparsity": 0.9117528006531009, "compression/movement_sparsity/model_sparsity": 0.8804312870709761, "compression_loss": 104.34141540527344, "distillation_loss": 4.425334930419922, "epoch": 4.24, "learning_rate": 3.1971447356062746e-05, "loss": 108.5857, "step": 5022, "task_loss": 2.356952428817749 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9840561321870743, "compression/movement_sparsity/importance_threshold": -0.0001134102969131464, "compression/movement_sparsity/linear_layer_sparsity": 0.911843376630463, "compression/movement_sparsity/model_sparsity": 0.8805187514848714, "compression_loss": 104.34645080566406, "distillation_loss": 4.185605049133301, "epoch": 4.25, "learning_rate": 3.1966751197520426e-05, "loss": 108.2858, "step": 5023, "task_loss": 1.741166353225708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9841095752748545, "compression/movement_sparsity/importance_threshold": -0.00011303015098342462, "compression/movement_sparsity/linear_layer_sparsity": 0.911818586285948, "compression/movement_sparsity/model_sparsity": 0.8804948127649546, "compression_loss": 104.35147857666016, "distillation_loss": 4.892793655395508, "epoch": 4.25, "learning_rate": 3.196205503897812e-05, "loss": 108.7029, "step": 5024, "task_loss": 2.682919979095459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9841628988032042, "compression/movement_sparsity/importance_threshold": -0.00011265085549166855, "compression/movement_sparsity/linear_layer_sparsity": 0.9118341354005451, "compression/movement_sparsity/model_sparsity": 0.8805098277196306, "compression_loss": 104.35649871826172, "distillation_loss": 4.861721038818359, "epoch": 4.25, "learning_rate": 3.1957358880435805e-05, "loss": 108.9899, "step": 5025, "task_loss": 3.140672206878662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9842161029060085, "compression/movement_sparsity/importance_threshold": -0.00011227240948554103, "compression/movement_sparsity/linear_layer_sparsity": 0.9118942809021005, "compression/movement_sparsity/model_sparsity": 0.8805679070381782, "compression_loss": 104.3614730834961, "distillation_loss": 4.27801513671875, "epoch": 4.25, "learning_rate": 3.195266272189349e-05, "loss": 108.6148, "step": 5026, "task_loss": 2.72994327545166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9842691877171524, "compression/movement_sparsity/importance_threshold": -0.00011189481201270314, "compression/movement_sparsity/linear_layer_sparsity": 0.9118615490619401, "compression/movement_sparsity/model_sparsity": 0.8805362996374222, "compression_loss": 104.36637878417969, "distillation_loss": 3.854684352874756, "epoch": 4.25, "learning_rate": 3.194796656335118e-05, "loss": 107.89, "step": 5027, "task_loss": 2.6837313175201416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9843221533705212, "compression/movement_sparsity/importance_threshold": -0.00011151806212081859, "compression/movement_sparsity/linear_layer_sparsity": 0.9118902505334395, "compression/movement_sparsity/model_sparsity": 0.8805640151250796, "compression_loss": 104.37135314941406, "distillation_loss": 5.958911895751953, "epoch": 4.25, "learning_rate": 3.1943270404808864e-05, "loss": 108.8032, "step": 5028, "task_loss": 3.183894395828247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.984375, "compression/movement_sparsity/importance_threshold": -0.00011114215885754675, "compression/movement_sparsity/linear_layer_sparsity": 0.9119588264215134, "compression/movement_sparsity/model_sparsity": 0.8806302352204342, "compression_loss": 104.37630462646484, "distillation_loss": 5.923521041870117, "epoch": 4.25, "learning_rate": 3.193857424626656e-05, "loss": 108.8359, "step": 5029, "task_loss": 3.2932369709014893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.984427727739474, "compression/movement_sparsity/importance_threshold": -0.00011076710127055044, "compression/movement_sparsity/linear_layer_sparsity": 0.9120086575180637, "compression/movement_sparsity/model_sparsity": 0.8806783544655195, "compression_loss": 104.38121032714844, "distillation_loss": 4.6384172439575195, "epoch": 4.25, "learning_rate": 3.1933878087724244e-05, "loss": 108.7693, "step": 5030, "task_loss": 2.203394889831543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9844803367228281, "compression/movement_sparsity/importance_threshold": -0.00011039288840749249, "compression/movement_sparsity/linear_layer_sparsity": 0.9120035658984832, "compression/movement_sparsity/model_sparsity": 0.8806734377587352, "compression_loss": 104.38616943359375, "distillation_loss": 4.451485633850098, "epoch": 4.25, "learning_rate": 3.192918192918193e-05, "loss": 108.4133, "step": 5031, "task_loss": 2.116568088531494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9845328270839477, "compression/movement_sparsity/importance_threshold": -0.00011001951931603401, "compression/movement_sparsity/linear_layer_sparsity": 0.9120797613296764, "compression/movement_sparsity/model_sparsity": 0.8807470156424624, "compression_loss": 104.39105987548828, "distillation_loss": 5.354344367980957, "epoch": 4.25, "learning_rate": 3.1924485770639616e-05, "loss": 109.062, "step": 5032, "task_loss": 2.773813247680664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9845851989567178, "compression/movement_sparsity/importance_threshold": -0.00010964699304383697, "compression/movement_sparsity/linear_layer_sparsity": 0.9121260271001035, "compression/movement_sparsity/model_sparsity": 0.8807916920413451, "compression_loss": 104.39598846435547, "distillation_loss": 3.5296123027801514, "epoch": 4.25, "learning_rate": 3.191978961209731e-05, "loss": 107.9402, "step": 5033, "task_loss": 2.6511311531066895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9846374524750237, "compression/movement_sparsity/importance_threshold": -0.00010927530863856245, "compression/movement_sparsity/linear_layer_sparsity": 0.9121005093813628, "compression/movement_sparsity/model_sparsity": 0.8807670509347448, "compression_loss": 104.40081787109375, "distillation_loss": 5.767242431640625, "epoch": 4.26, "learning_rate": 3.1915093453554996e-05, "loss": 108.8393, "step": 5034, "task_loss": 2.8556175231933594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9846895877727504, "compression/movement_sparsity/importance_threshold": -0.00010890446514787242, "compression/movement_sparsity/linear_layer_sparsity": 0.9121157365434337, "compression/movement_sparsity/model_sparsity": 0.8807817549969544, "compression_loss": 104.40567016601562, "distillation_loss": 4.7944746017456055, "epoch": 4.26, "learning_rate": 3.1910397295012675e-05, "loss": 108.2426, "step": 5035, "task_loss": 2.214909315109253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.984741604983783, "compression/movement_sparsity/importance_threshold": -0.00010853446161942972, "compression/movement_sparsity/linear_layer_sparsity": 0.91214611932257, "compression/movement_sparsity/model_sparsity": 0.8808110940341589, "compression_loss": 104.41057586669922, "distillation_loss": 4.036089897155762, "epoch": 4.26, "learning_rate": 3.190570113647037e-05, "loss": 109.0691, "step": 5036, "task_loss": 2.775299549102783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9847935042420068, "compression/movement_sparsity/importance_threshold": -0.00010816529710089545, "compression/movement_sparsity/linear_layer_sparsity": 0.9122500622918517, "compression/movement_sparsity/model_sparsity": 0.88091146624268, "compression_loss": 104.41539001464844, "distillation_loss": 4.319891452789307, "epoch": 4.26, "learning_rate": 3.1901004977928055e-05, "loss": 108.7449, "step": 5037, "task_loss": 2.226421594619751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9848452856813068, "compression/movement_sparsity/importance_threshold": -0.00010779697063993243, "compression/movement_sparsity/linear_layer_sparsity": 0.9123482220398301, "compression/movement_sparsity/model_sparsity": 0.8810062539013406, "compression_loss": 104.42024230957031, "distillation_loss": 5.4547600746154785, "epoch": 4.26, "learning_rate": 3.189630881938575e-05, "loss": 109.565, "step": 5038, "task_loss": 2.645890712738037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9848969494355683, "compression/movement_sparsity/importance_threshold": -0.00010742948128420089, "compression/movement_sparsity/linear_layer_sparsity": 0.9123675391914002, "compression/movement_sparsity/model_sparsity": 0.8810249074493278, "compression_loss": 104.425048828125, "distillation_loss": 5.271458148956299, "epoch": 4.26, "learning_rate": 3.1891612660843434e-05, "loss": 108.5149, "step": 5039, "task_loss": 2.631908655166626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9849484956386761, "compression/movement_sparsity/importance_threshold": -0.00010706282808136368, "compression/movement_sparsity/linear_layer_sparsity": 0.9123967414779405, "compression/movement_sparsity/model_sparsity": 0.8810531065474886, "compression_loss": 104.429931640625, "distillation_loss": 4.879181385040283, "epoch": 4.26, "learning_rate": 3.188691650230112e-05, "loss": 109.0961, "step": 5040, "task_loss": 1.6607111692428589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9849999244245158, "compression/movement_sparsity/importance_threshold": -0.00010669701007908275, "compression/movement_sparsity/linear_layer_sparsity": 0.9124136737959834, "compression/movement_sparsity/model_sparsity": 0.8810694571883169, "compression_loss": 104.43474578857422, "distillation_loss": 5.471648216247559, "epoch": 4.26, "learning_rate": 3.188222034375881e-05, "loss": 109.0103, "step": 5041, "task_loss": 2.613938093185425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9850512359269723, "compression/movement_sparsity/importance_threshold": -0.00010633202632502007, "compression/movement_sparsity/linear_layer_sparsity": 0.9124778496661996, "compression/movement_sparsity/model_sparsity": 0.8811314284199631, "compression_loss": 104.43955993652344, "distillation_loss": 4.891733169555664, "epoch": 4.26, "learning_rate": 3.187752418521649e-05, "loss": 108.5138, "step": 5042, "task_loss": 1.684827208518982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9851024302799307, "compression/movement_sparsity/importance_threshold": -0.00010596787586683674, "compression/movement_sparsity/linear_layer_sparsity": 0.9124658658777256, "compression/movement_sparsity/model_sparsity": 0.8811198563114896, "compression_loss": 104.4443130493164, "distillation_loss": 3.5399320125579834, "epoch": 4.26, "learning_rate": 3.1872828026674186e-05, "loss": 108.4008, "step": 5043, "task_loss": 1.3717087507247925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9851535076172762, "compression/movement_sparsity/importance_threshold": -0.00010560455775219559, "compression/movement_sparsity/linear_layer_sparsity": 0.9125140037424716, "compression/movement_sparsity/model_sparsity": 0.8811663404924921, "compression_loss": 104.4491195678711, "distillation_loss": 4.994228363037109, "epoch": 4.26, "learning_rate": 3.1868131868131866e-05, "loss": 108.6111, "step": 5044, "task_loss": 1.8546278476715088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.985204468072894, "compression/movement_sparsity/importance_threshold": -0.00010524207102875684, "compression/movement_sparsity/linear_layer_sparsity": 0.912537303566032, "compression/movement_sparsity/model_sparsity": 0.8811888398954346, "compression_loss": 104.4539566040039, "distillation_loss": 5.128544807434082, "epoch": 4.26, "learning_rate": 3.186343570958956e-05, "loss": 108.2555, "step": 5045, "task_loss": 2.7458300590515137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.985255311780669, "compression/movement_sparsity/importance_threshold": -0.00010488041474418421, "compression/movement_sparsity/linear_layer_sparsity": 0.9124577932162361, "compression/movement_sparsity/model_sparsity": 0.8811120609707567, "compression_loss": 104.45868682861328, "distillation_loss": 5.4424004554748535, "epoch": 4.27, "learning_rate": 3.1858739551047245e-05, "loss": 108.6822, "step": 5046, "task_loss": 2.9729886054992676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9853060388744866, "compression/movement_sparsity/importance_threshold": -0.00010451958794613879, "compression/movement_sparsity/linear_layer_sparsity": 0.9124809737981202, "compression/movement_sparsity/model_sparsity": 0.8811344452283413, "compression_loss": 104.4634780883789, "distillation_loss": 3.640532970428467, "epoch": 4.27, "learning_rate": 3.185404339250493e-05, "loss": 108.3545, "step": 5047, "task_loss": 1.6680494546890259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.985356649488232, "compression/movement_sparsity/importance_threshold": -0.00010415958968228254, "compression/movement_sparsity/linear_layer_sparsity": 0.9125606749345984, "compression/movement_sparsity/model_sparsity": 0.881211408385592, "compression_loss": 104.46820831298828, "distillation_loss": 5.305261135101318, "epoch": 4.27, "learning_rate": 3.184934723396262e-05, "loss": 108.6815, "step": 5048, "task_loss": 3.6468071937561035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9854071437557902, "compression/movement_sparsity/importance_threshold": -0.0001038004190002757, "compression/movement_sparsity/linear_layer_sparsity": 0.9126065352833258, "compression/movement_sparsity/model_sparsity": 0.8812556932902578, "compression_loss": 104.47296142578125, "distillation_loss": 4.624713897705078, "epoch": 4.27, "learning_rate": 3.1844651075420304e-05, "loss": 109.4092, "step": 5049, "task_loss": 1.870958685874939 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9854575218110462, "compression/movement_sparsity/importance_threshold": -0.00010344207494778369, "compression/movement_sparsity/linear_layer_sparsity": 0.9126401852843943, "compression/movement_sparsity/model_sparsity": 0.8812881873102699, "compression_loss": 104.47763061523438, "distillation_loss": 5.337649822235107, "epoch": 4.27, "learning_rate": 3.1839954916878e-05, "loss": 108.7671, "step": 5050, "task_loss": 1.7591063976287842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9855077837878853, "compression/movement_sparsity/importance_threshold": -0.00010308455657246501, "compression/movement_sparsity/linear_layer_sparsity": 0.9126445018330784, "compression/movement_sparsity/model_sparsity": 0.8812923555722276, "compression_loss": 104.48233032226562, "distillation_loss": 3.8554539680480957, "epoch": 4.27, "learning_rate": 3.1835258758335684e-05, "loss": 108.5898, "step": 5051, "task_loss": 2.1395936012268066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9855579298201926, "compression/movement_sparsity/importance_threshold": -0.00010272786292198337, "compression/movement_sparsity/linear_layer_sparsity": 0.9126999730609204, "compression/movement_sparsity/model_sparsity": 0.8813459211927438, "compression_loss": 104.48701477050781, "distillation_loss": 6.024517059326172, "epoch": 4.27, "learning_rate": 3.183056259979337e-05, "loss": 108.9136, "step": 5052, "task_loss": 3.384887218475342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9856079600418534, "compression/movement_sparsity/importance_threshold": -0.00010237199304399987, "compression/movement_sparsity/linear_layer_sparsity": 0.9127434366519532, "compression/movement_sparsity/model_sparsity": 0.8813878916757149, "compression_loss": 104.49174499511719, "distillation_loss": 6.46482515335083, "epoch": 4.27, "learning_rate": 3.1825866441251057e-05, "loss": 109.5645, "step": 5053, "task_loss": 2.8559885025024414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9856578745867526, "compression/movement_sparsity/importance_threshold": -0.00010201694598617646, "compression/movement_sparsity/linear_layer_sparsity": 0.9127530594552353, "compression/movement_sparsity/model_sparsity": 0.8813971839061011, "compression_loss": 104.4963607788086, "distillation_loss": 3.532233953475952, "epoch": 4.27, "learning_rate": 3.182117028270874e-05, "loss": 108.7952, "step": 5054, "task_loss": 2.0725934505462646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9857076735887755, "compression/movement_sparsity/importance_threshold": -0.00010166272079617424, "compression/movement_sparsity/linear_layer_sparsity": 0.9128494663505713, "compression/movement_sparsity/model_sparsity": 0.8814902789279999, "compression_loss": 104.50106048583984, "distillation_loss": 4.744317054748535, "epoch": 4.27, "learning_rate": 3.1816474124166436e-05, "loss": 109.2638, "step": 5055, "task_loss": 1.8506711721420288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9857573571818071, "compression/movement_sparsity/importance_threshold": -0.00010130931652165692, "compression/movement_sparsity/linear_layer_sparsity": 0.9128823412807434, "compression/movement_sparsity/model_sparsity": 0.8815220245031855, "compression_loss": 104.50565338134766, "distillation_loss": 5.229427814483643, "epoch": 4.27, "learning_rate": 3.181177796562412e-05, "loss": 108.7621, "step": 5056, "task_loss": 3.1347546577453613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9858069254997327, "compression/movement_sparsity/importance_threshold": -0.00010095673221028473, "compression/movement_sparsity/linear_layer_sparsity": 0.9129127479082149, "compression/movement_sparsity/model_sparsity": 0.8815513865694615, "compression_loss": 104.51033020019531, "distillation_loss": 4.544982433319092, "epoch": 4.27, "learning_rate": 3.180708180708181e-05, "loss": 108.6867, "step": 5057, "task_loss": 2.7318215370178223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9858563786764374, "compression/movement_sparsity/importance_threshold": -0.00010060496690972049, "compression/movement_sparsity/linear_layer_sparsity": 0.9129016107356429, "compression/movement_sparsity/model_sparsity": 0.8815406319930295, "compression_loss": 104.51494598388672, "distillation_loss": 4.373244285583496, "epoch": 4.28, "learning_rate": 3.1802385648539495e-05, "loss": 108.7917, "step": 5058, "task_loss": 1.2020103931427002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9859057168458063, "compression/movement_sparsity/importance_threshold": -0.00010025401966762531, "compression/movement_sparsity/linear_layer_sparsity": 0.9129527296422979, "compression/movement_sparsity/model_sparsity": 0.8815899948079806, "compression_loss": 104.51950073242188, "distillation_loss": 4.660158157348633, "epoch": 4.28, "learning_rate": 3.179768948999718e-05, "loss": 108.824, "step": 5059, "task_loss": 3.123424768447876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9859549401417245, "compression/movement_sparsity/importance_threshold": -9.990388953166202e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9129903265428537, "compression/movement_sparsity/model_sparsity": 0.8816263001393408, "compression_loss": 104.52408599853516, "distillation_loss": 3.555788040161133, "epoch": 4.28, "learning_rate": 3.1792993331454874e-05, "loss": 108.8937, "step": 5060, "task_loss": 1.9908853769302368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9860040486980772, "compression/movement_sparsity/importance_threshold": -9.955457554949084e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9129644272507487, "compression/movement_sparsity/model_sparsity": 0.8816012905675951, "compression_loss": 104.52864837646484, "distillation_loss": 5.032294273376465, "epoch": 4.28, "learning_rate": 3.1788297172912554e-05, "loss": 109.0549, "step": 5061, "task_loss": 2.507904291152954 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9860530426487495, "compression/movement_sparsity/importance_threshold": -9.920607676877548e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9129493908753599, "compression/movement_sparsity/model_sparsity": 0.8815867707379582, "compression_loss": 104.53324127197266, "distillation_loss": 3.639608383178711, "epoch": 4.28, "learning_rate": 3.178360101437025e-05, "loss": 108.9286, "step": 5062, "task_loss": 1.9891988039016724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9861019221276266, "compression/movement_sparsity/importance_threshold": -9.885839223717618e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9129467317859771, "compression/movement_sparsity/model_sparsity": 0.881584202996476, "compression_loss": 104.53776550292969, "distillation_loss": 3.753262758255005, "epoch": 4.28, "learning_rate": 3.1778904855827933e-05, "loss": 108.9333, "step": 5063, "task_loss": 2.1261942386627197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9861506872685937, "compression/movement_sparsity/importance_threshold": -9.851152100235575e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9129958116599662, "compression/movement_sparsity/model_sparsity": 0.8816315968258064, "compression_loss": 104.54228210449219, "distillation_loss": 5.539440631866455, "epoch": 4.28, "learning_rate": 3.1774208697285627e-05, "loss": 109.1773, "step": 5064, "task_loss": 3.702625274658203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9861993382055357, "compression/movement_sparsity/importance_threshold": -9.816546211197617e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9129901119078363, "compression/movement_sparsity/model_sparsity": 0.8816260928776966, "compression_loss": 104.54682159423828, "distillation_loss": 5.568105220794678, "epoch": 4.28, "learning_rate": 3.1769512538743306e-05, "loss": 109.3531, "step": 5065, "task_loss": 2.7501678466796875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9862478750723379, "compression/movement_sparsity/importance_threshold": -9.78202146136994e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.912988549841876, "compression/movement_sparsity/model_sparsity": 0.8816245844735074, "compression_loss": 104.55131530761719, "distillation_loss": 4.74800443649292, "epoch": 4.28, "learning_rate": 3.176481638020099e-05, "loss": 108.6853, "step": 5066, "task_loss": 3.6204872131347656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9862962980028855, "compression/movement_sparsity/importance_threshold": -9.747577755518568e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.913016810119173, "compression/movement_sparsity/model_sparsity": 0.8816518739233405, "compression_loss": 104.55571746826172, "distillation_loss": 4.964789390563965, "epoch": 4.28, "learning_rate": 3.1760120221658686e-05, "loss": 108.6669, "step": 5067, "task_loss": 2.160592555999756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9863446071310636, "compression/movement_sparsity/importance_threshold": -9.71321499840987e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9130049694207106, "compression/movement_sparsity/model_sparsity": 0.8816404399892965, "compression_loss": 104.56024932861328, "distillation_loss": 5.610226631164551, "epoch": 4.28, "learning_rate": 3.175542406311637e-05, "loss": 109.6891, "step": 5068, "task_loss": 3.4070065021514893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9863928025907573, "compression/movement_sparsity/importance_threshold": -9.678933094809869e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.913019528829394, "compression/movement_sparsity/model_sparsity": 0.8816544992375017, "compression_loss": 104.56464385986328, "distillation_loss": 5.028804779052734, "epoch": 4.28, "learning_rate": 3.1750727904574065e-05, "loss": 109.0822, "step": 5069, "task_loss": 2.826892852783203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9864408845158518, "compression/movement_sparsity/importance_threshold": -9.644731949484935e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9130211028195219, "compression/movement_sparsity/model_sparsity": 0.8816560191562266, "compression_loss": 104.56918334960938, "distillation_loss": 5.038647174835205, "epoch": 4.29, "learning_rate": 3.1746031746031745e-05, "loss": 108.6294, "step": 5070, "task_loss": 3.1581618785858154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.986488853040232, "compression/movement_sparsity/importance_threshold": -9.610611467201179e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9130553967256426, "compression/movement_sparsity/model_sparsity": 0.8816891349611716, "compression_loss": 104.57361602783203, "distillation_loss": 4.09964656829834, "epoch": 4.29, "learning_rate": 3.174133558748944e-05, "loss": 108.9277, "step": 5071, "task_loss": 2.61124324798584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9865367082977834, "compression/movement_sparsity/importance_threshold": -9.576571552724795e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9130421370512316, "compression/movement_sparsity/model_sparsity": 0.8816763307973682, "compression_loss": 104.57799530029297, "distillation_loss": 7.052394866943359, "epoch": 4.29, "learning_rate": 3.1736639428947124e-05, "loss": 109.4853, "step": 5072, "task_loss": 3.662619113922119 }, { "compression/movement_sparsity/importance_regularization_factor": 0.986584450422391, "compression/movement_sparsity/importance_threshold": -9.542612110821896e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.91298999266616, "compression/movement_sparsity/model_sparsity": 0.8816259777323386, "compression_loss": 104.5823745727539, "distillation_loss": 4.875102996826172, "epoch": 4.29, "learning_rate": 3.173194327040481e-05, "loss": 109.046, "step": 5073, "task_loss": 3.1285769939422607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9866320795479399, "compression/movement_sparsity/importance_threshold": -9.508733046258676e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9129742527648806, "compression/movement_sparsity/model_sparsity": 0.8816107785450898, "compression_loss": 104.5868148803711, "distillation_loss": 6.343661308288574, "epoch": 4.29, "learning_rate": 3.17272471118625e-05, "loss": 109.883, "step": 5074, "task_loss": 4.520846366882324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9866795958083151, "compression/movement_sparsity/importance_threshold": -9.474934263801419e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.912953480864859, "compression/movement_sparsity/model_sparsity": 0.8815907202237357, "compression_loss": 104.59119415283203, "distillation_loss": 4.12447452545166, "epoch": 4.29, "learning_rate": 3.172255095332018e-05, "loss": 108.9937, "step": 5075, "task_loss": 2.0759224891662598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.986726999337402, "compression/movement_sparsity/importance_threshold": -9.441215668216235e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.912921488323092, "compression/movement_sparsity/model_sparsity": 0.881559826724199, "compression_loss": 104.59551239013672, "distillation_loss": 5.408777236938477, "epoch": 4.29, "learning_rate": 3.1717854794777876e-05, "loss": 108.4068, "step": 5076, "task_loss": 1.8839396238327026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9867742902690857, "compression/movement_sparsity/importance_threshold": -9.40757716426932e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9129914593387791, "compression/movement_sparsity/model_sparsity": 0.8816273940202414, "compression_loss": 104.59988403320312, "distillation_loss": 4.9225873947143555, "epoch": 4.29, "learning_rate": 3.171315863623556e-05, "loss": 108.8807, "step": 5077, "task_loss": 2.579627513885498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9868214687372513, "compression/movement_sparsity/importance_threshold": -9.374018656726785e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.912996837138383, "compression/movement_sparsity/model_sparsity": 0.8816325870758847, "compression_loss": 104.60425567626953, "distillation_loss": 4.354554176330566, "epoch": 4.29, "learning_rate": 3.170846247769325e-05, "loss": 109.441, "step": 5078, "task_loss": 2.039302110671997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9868685348757839, "compression/movement_sparsity/importance_threshold": -9.340540050354999e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9130337066647131, "compression/movement_sparsity/model_sparsity": 0.8816681900205614, "compression_loss": 104.608642578125, "distillation_loss": 4.49071741104126, "epoch": 4.29, "learning_rate": 3.1703766319150935e-05, "loss": 109.56, "step": 5079, "task_loss": 2.848233222961426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9869154888185686, "compression/movement_sparsity/importance_threshold": -9.307141249919985e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.913005708719104, "compression/movement_sparsity/model_sparsity": 0.8816411538905158, "compression_loss": 104.61298370361328, "distillation_loss": 3.8487954139709473, "epoch": 4.29, "learning_rate": 3.169907016060862e-05, "loss": 108.5518, "step": 5080, "task_loss": 1.4336045980453491 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9869623306994906, "compression/movement_sparsity/importance_threshold": -9.273822160188027e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9130636601738144, "compression/movement_sparsity/model_sparsity": 0.8816971145344773, "compression_loss": 104.61734771728516, "distillation_loss": 4.344496726989746, "epoch": 4.29, "learning_rate": 3.1694374002066315e-05, "loss": 109.0707, "step": 5081, "task_loss": 2.0669944286346436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9870090606524351, "compression/movement_sparsity/importance_threshold": -9.240582685925234e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9130421489753991, "compression/movement_sparsity/model_sparsity": 0.881676342311904, "compression_loss": 104.62167358398438, "distillation_loss": 5.099299907684326, "epoch": 4.3, "learning_rate": 3.1689677843524e-05, "loss": 109.5054, "step": 5082, "task_loss": 2.585542917251587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9870556788112871, "compression/movement_sparsity/importance_threshold": -9.20742273189789e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9130339809205686, "compression/movement_sparsity/model_sparsity": 0.8816684548548847, "compression_loss": 104.62596130371094, "distillation_loss": 5.2417192459106445, "epoch": 4.3, "learning_rate": 3.168498168498169e-05, "loss": 109.2237, "step": 5083, "task_loss": 2.803450584411621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9871021853099319, "compression/movement_sparsity/importance_threshold": -9.174342202872016e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9130143775889753, "compression/movement_sparsity/model_sparsity": 0.8816495249580384, "compression_loss": 104.63030242919922, "distillation_loss": 4.552940368652344, "epoch": 4.3, "learning_rate": 3.1680285526439374e-05, "loss": 108.6235, "step": 5084, "task_loss": 1.8778568506240845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9871485802822545, "compression/movement_sparsity/importance_threshold": -9.141341003613985e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9131409884009328, "compression/movement_sparsity/model_sparsity": 0.8817717862991049, "compression_loss": 104.63463592529297, "distillation_loss": 3.6431565284729004, "epoch": 4.3, "learning_rate": 3.167558936789706e-05, "loss": 108.522, "step": 5085, "task_loss": 1.0890514850616455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9871948638621402, "compression/movement_sparsity/importance_threshold": -9.108419038889818e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9132387069547088, "compression/movement_sparsity/model_sparsity": 0.8818661479199411, "compression_loss": 104.63884735107422, "distillation_loss": 4.197988986968994, "epoch": 4.3, "learning_rate": 3.167089320935475e-05, "loss": 108.8151, "step": 5086, "task_loss": 2.0392909049987793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9872410361834739, "compression/movement_sparsity/importance_threshold": -9.075576213465799e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9132300023123346, "compression/movement_sparsity/model_sparsity": 0.8818577423088111, "compression_loss": 104.64313507080078, "distillation_loss": 4.143352031707764, "epoch": 4.3, "learning_rate": 3.166619705081243e-05, "loss": 108.8225, "step": 5087, "task_loss": 3.0179178714752197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.987287097380141, "compression/movement_sparsity/importance_threshold": -9.042812432108124e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9131966981121277, "compression/movement_sparsity/model_sparsity": 0.881825582210337, "compression_loss": 104.6473159790039, "distillation_loss": 5.547877788543701, "epoch": 4.3, "learning_rate": 3.1661500892270126e-05, "loss": 108.7827, "step": 5088, "task_loss": 2.5743894577026367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9873330475860265, "compression/movement_sparsity/importance_threshold": -9.010127599582904e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9132125811034186, "compression/movement_sparsity/model_sparsity": 0.8818409195720153, "compression_loss": 104.6515121459961, "distillation_loss": 5.565648078918457, "epoch": 4.3, "learning_rate": 3.165680473372781e-05, "loss": 108.1901, "step": 5089, "task_loss": 3.374450445175171 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9873788869350155, "compression/movement_sparsity/importance_threshold": -8.977521620656421e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9132448955977118, "compression/movement_sparsity/model_sparsity": 0.8818721239640185, "compression_loss": 104.65568542480469, "distillation_loss": 7.4147443771362305, "epoch": 4.3, "learning_rate": 3.16521085751855e-05, "loss": 109.3394, "step": 5090, "task_loss": 4.344175815582275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9874246155609934, "compression/movement_sparsity/importance_threshold": -8.944994400094698e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9132241952426959, "compression/movement_sparsity/model_sparsity": 0.8818521347298792, "compression_loss": 104.65989685058594, "distillation_loss": 4.820096969604492, "epoch": 4.3, "learning_rate": 3.1647412416643185e-05, "loss": 109.2438, "step": 5091, "task_loss": 2.1813392639160156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9874702335978449, "compression/movement_sparsity/importance_threshold": -8.912545842664106e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9133263734351676, "compression/movement_sparsity/model_sparsity": 0.8819508027871026, "compression_loss": 104.66407012939453, "distillation_loss": 4.762113571166992, "epoch": 4.3, "learning_rate": 3.164271625810087e-05, "loss": 108.7876, "step": 5092, "task_loss": 2.0532028675079346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9875157411794555, "compression/movement_sparsity/importance_threshold": -8.880175853130754e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9133656993400308, "compression/movement_sparsity/model_sparsity": 0.881988777726153, "compression_loss": 104.66825103759766, "distillation_loss": 3.4940361976623535, "epoch": 4.3, "learning_rate": 3.1638020099558564e-05, "loss": 108.5962, "step": 5093, "task_loss": 2.4735631942749023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9875611384397103, "compression/movement_sparsity/importance_threshold": -8.847884336260839e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9134446611781154, "compression/movement_sparsity/model_sparsity": 0.8820650269821845, "compression_loss": 104.67240905761719, "distillation_loss": 4.030837059020996, "epoch": 4.31, "learning_rate": 3.163332394101625e-05, "loss": 108.5006, "step": 5094, "task_loss": 2.1113486289978027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9876064255124943, "compression/movement_sparsity/importance_threshold": -8.81567119682047e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9134444942397685, "compression/movement_sparsity/model_sparsity": 0.8820648657786834, "compression_loss": 104.67648315429688, "distillation_loss": 4.683039665222168, "epoch": 4.31, "learning_rate": 3.162862778247394e-05, "loss": 108.6988, "step": 5095, "task_loss": 2.4487693309783936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9876516025316927, "compression/movement_sparsity/importance_threshold": -8.783536339575931e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9134715859486372, "compression/movement_sparsity/model_sparsity": 0.8820910268040085, "compression_loss": 104.68070220947266, "distillation_loss": 4.7025017738342285, "epoch": 4.31, "learning_rate": 3.162393162393162e-05, "loss": 108.8665, "step": 5096, "task_loss": 3.12428617477417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9876966696311907, "compression/movement_sparsity/importance_threshold": -8.751479669293332e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9135431667269552, "compression/movement_sparsity/model_sparsity": 0.8821601485623831, "compression_loss": 104.68484497070312, "distillation_loss": 5.85412073135376, "epoch": 4.31, "learning_rate": 3.161923546538931e-05, "loss": 109.4222, "step": 5097, "task_loss": 2.6708436012268066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9877416269448734, "compression/movement_sparsity/importance_threshold": -8.719501090738869e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9135882400806188, "compression/movement_sparsity/model_sparsity": 0.8822036735076865, "compression_loss": 104.68900299072266, "distillation_loss": 3.9842538833618164, "epoch": 4.31, "learning_rate": 3.1614539306847e-05, "loss": 109.3721, "step": 5098, "task_loss": 2.2735650539398193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9877864746066259, "compression/movement_sparsity/importance_threshold": -8.687600508678825e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9136680246862704, "compression/movement_sparsity/model_sparsity": 0.8822807172666878, "compression_loss": 104.69306945800781, "distillation_loss": 5.181363105773926, "epoch": 4.31, "learning_rate": 3.160984314830469e-05, "loss": 109.4889, "step": 5099, "task_loss": 3.260190963745117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9878312127503334, "compression/movement_sparsity/importance_threshold": -8.65577782787931e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9137377333702696, "compression/movement_sparsity/model_sparsity": 0.8823480312429427, "compression_loss": 104.69722747802734, "distillation_loss": 5.3446831703186035, "epoch": 4.31, "learning_rate": 3.1605146989762375e-05, "loss": 108.6986, "step": 5100, "task_loss": 2.5290260314941406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.987875841509881, "compression/movement_sparsity/importance_threshold": -8.624032953106434e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9137544629774628, "compression/movement_sparsity/model_sparsity": 0.8823641861366625, "compression_loss": 104.7012710571289, "distillation_loss": 5.792247772216797, "epoch": 4.31, "learning_rate": 3.160045083122006e-05, "loss": 109.5342, "step": 5101, "task_loss": 3.568880319595337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9879203610191538, "compression/movement_sparsity/importance_threshold": -8.592365789126567e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9137089245812614, "compression/movement_sparsity/model_sparsity": 0.8823202121244631, "compression_loss": 104.70541381835938, "distillation_loss": 3.844280242919922, "epoch": 4.31, "learning_rate": 3.1595754672677755e-05, "loss": 109.5064, "step": 5102, "task_loss": 2.1342270374298096 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9879647714120371, "compression/movement_sparsity/importance_threshold": -8.560776240705645e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9137943850907077, "compression/movement_sparsity/model_sparsity": 0.8824027368025026, "compression_loss": 104.70945739746094, "distillation_loss": 3.9096224308013916, "epoch": 4.31, "learning_rate": 3.159105851413544e-05, "loss": 109.8586, "step": 5103, "task_loss": 3.0811352729797363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9880090728224158, "compression/movement_sparsity/importance_threshold": -8.529264212610126e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9138418909745689, "compression/movement_sparsity/model_sparsity": 0.882448610713108, "compression_loss": 104.71350860595703, "distillation_loss": 3.9818289279937744, "epoch": 4.31, "learning_rate": 3.158636235559313e-05, "loss": 109.1695, "step": 5104, "task_loss": 2.5189788341522217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9880532653841753, "compression/movement_sparsity/importance_threshold": -8.497829609606031e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9138735615638097, "compression/movement_sparsity/model_sparsity": 0.8824791933201783, "compression_loss": 104.71755981445312, "distillation_loss": 3.624044179916382, "epoch": 4.32, "learning_rate": 3.1581666197050814e-05, "loss": 108.1952, "step": 5105, "task_loss": 2.5003573894500732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9880973492312005, "compression/movement_sparsity/importance_threshold": -8.466472336459644e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139271130006624, "compression/movement_sparsity/model_sparsity": 0.8825309051004315, "compression_loss": 104.72160339355469, "distillation_loss": 4.248787879943848, "epoch": 4.32, "learning_rate": 3.15769700385085e-05, "loss": 108.4107, "step": 5106, "task_loss": 2.456002712249756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9881413244973768, "compression/movement_sparsity/importance_threshold": -8.435192297936902e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9138580720700508, "compression/movement_sparsity/model_sparsity": 0.8824642359381811, "compression_loss": 104.7256088256836, "distillation_loss": 4.179574012756348, "epoch": 4.32, "learning_rate": 3.1572273879966193e-05, "loss": 108.8435, "step": 5107, "task_loss": 2.1773300170898438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9881851913165891, "compression/movement_sparsity/importance_threshold": -8.403989398804348e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9138478172858839, "compression/movement_sparsity/model_sparsity": 0.8824543334373979, "compression_loss": 104.72967529296875, "distillation_loss": 5.382200241088867, "epoch": 4.32, "learning_rate": 3.156757772142387e-05, "loss": 108.7601, "step": 5108, "task_loss": 2.2587757110595703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9882289498227227, "compression/movement_sparsity/importance_threshold": -8.372863543827918e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9138481153900748, "compression/movement_sparsity/model_sparsity": 0.8824546213007928, "compression_loss": 104.73361206054688, "distillation_loss": 6.770040988922119, "epoch": 4.32, "learning_rate": 3.1562881562881566e-05, "loss": 109.4822, "step": 5109, "task_loss": 3.566114902496338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9882726001496626, "compression/movement_sparsity/importance_threshold": -8.341814637773896e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9138442400355932, "compression/movement_sparsity/model_sparsity": 0.8824508790766595, "compression_loss": 104.73762512207031, "distillation_loss": 4.322707653045654, "epoch": 4.32, "learning_rate": 3.155818540433925e-05, "loss": 108.6124, "step": 5110, "task_loss": 3.2715888023376465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9883161424312941, "compression/movement_sparsity/importance_threshold": -8.310842585408391e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139113611752154, "compression/movement_sparsity/model_sparsity": 0.882515694398647, "compression_loss": 104.74160766601562, "distillation_loss": 4.734107971191406, "epoch": 4.32, "learning_rate": 3.155348924579694e-05, "loss": 108.7958, "step": 5111, "task_loss": 2.8220245838165283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9883595768015022, "compression/movement_sparsity/importance_threshold": -8.279947291497686e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139265764131188, "compression/movement_sparsity/model_sparsity": 0.8825303869463208, "compression_loss": 104.74559783935547, "distillation_loss": 5.729062557220459, "epoch": 4.32, "learning_rate": 3.1548793087254625e-05, "loss": 109.4598, "step": 5112, "task_loss": 3.013364791870117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9884029033941721, "compression/movement_sparsity/importance_threshold": -8.249128660807892e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139005459551697, "compression/movement_sparsity/model_sparsity": 0.8825052507146813, "compression_loss": 104.74958038330078, "distillation_loss": 3.0401573181152344, "epoch": 4.32, "learning_rate": 3.154409692871231e-05, "loss": 108.5822, "step": 5113, "task_loss": 1.2102855443954468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.988446122343189, "compression/movement_sparsity/importance_threshold": -8.218386598105292e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9138740623788505, "compression/movement_sparsity/model_sparsity": 0.8824796769306816, "compression_loss": 104.75354766845703, "distillation_loss": 4.1310577392578125, "epoch": 4.32, "learning_rate": 3.1539400770170005e-05, "loss": 109.4427, "step": 5114, "task_loss": 2.086760997772217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9884892337824379, "compression/movement_sparsity/importance_threshold": -8.187721008155995e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9138803941118651, "compression/movement_sparsity/model_sparsity": 0.8824857911491886, "compression_loss": 104.7575454711914, "distillation_loss": 5.012709617614746, "epoch": 4.32, "learning_rate": 3.153470461162769e-05, "loss": 109.4016, "step": 5115, "task_loss": 2.719228744506836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9885322378458041, "compression/movement_sparsity/importance_threshold": -8.157131795726111e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139053990913975, "compression/movement_sparsity/model_sparsity": 0.8825099371307497, "compression_loss": 104.7614974975586, "distillation_loss": 5.465641021728516, "epoch": 4.32, "learning_rate": 3.153000845308538e-05, "loss": 109.6032, "step": 5116, "task_loss": 3.213501453399658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9885751346671726, "compression/movement_sparsity/importance_threshold": -8.126618865581924e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.91389464349219, "compression/movement_sparsity/model_sparsity": 0.882499551019463, "compression_loss": 104.76541900634766, "distillation_loss": 4.405257225036621, "epoch": 4.33, "learning_rate": 3.1525312294543064e-05, "loss": 108.8233, "step": 5117, "task_loss": 2.2014448642730713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9886179243804287, "compression/movement_sparsity/importance_threshold": -8.09618212248963e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139244658354473, "compression/movement_sparsity/model_sparsity": 0.8825283488734852, "compression_loss": 104.76930236816406, "distillation_loss": 3.9164886474609375, "epoch": 4.33, "learning_rate": 3.152061613600075e-05, "loss": 108.8392, "step": 5118, "task_loss": 1.7163604497909546 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9886606071194574, "compression/movement_sparsity/importance_threshold": -8.065821471215425e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9138974456715844, "compression/movement_sparsity/model_sparsity": 0.8825022569353748, "compression_loss": 104.77327728271484, "distillation_loss": 3.128160238265991, "epoch": 4.33, "learning_rate": 3.151591997745844e-05, "loss": 108.5599, "step": 5119, "task_loss": 1.8123856782913208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9887031830181439, "compression/movement_sparsity/importance_threshold": -8.035536816525333e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139713874350943, "compression/movement_sparsity/model_sparsity": 0.8825736585718367, "compression_loss": 104.77716064453125, "distillation_loss": 3.2543768882751465, "epoch": 4.33, "learning_rate": 3.151122381891613e-05, "loss": 108.3568, "step": 5120, "task_loss": 2.146092176437378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9887456522103732, "compression/movement_sparsity/importance_threshold": -8.00532806318581e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9140081377197481, "compression/movement_sparsity/model_sparsity": 0.8826091463711555, "compression_loss": 104.78104400634766, "distillation_loss": 3.1753392219543457, "epoch": 4.33, "learning_rate": 3.1506527660373816e-05, "loss": 108.1296, "step": 5121, "task_loss": 1.7257936000823975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9887880148300308, "compression/movement_sparsity/importance_threshold": -7.975195115962792e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9140957445793688, "compression/movement_sparsity/model_sparsity": 0.8826937436656379, "compression_loss": 104.78494262695312, "distillation_loss": 4.910184860229492, "epoch": 4.33, "learning_rate": 3.15018315018315e-05, "loss": 109.4355, "step": 5122, "task_loss": 3.5960330963134766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9888302710110014, "compression/movement_sparsity/importance_threshold": -7.945137879622564e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9141408060088647, "compression/movement_sparsity/model_sparsity": 0.8827372570964055, "compression_loss": 104.78880310058594, "distillation_loss": 2.6587319374084473, "epoch": 4.33, "learning_rate": 3.149713534328919e-05, "loss": 108.215, "step": 5123, "task_loss": 2.092409133911133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9888724208871704, "compression/movement_sparsity/importance_threshold": -7.91515625893132e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9142441408455971, "compression/movement_sparsity/model_sparsity": 0.882837042063601, "compression_loss": 104.79266357421875, "distillation_loss": 3.8815860748291016, "epoch": 4.33, "learning_rate": 3.149243918474688e-05, "loss": 109.15, "step": 5124, "task_loss": 2.5863542556762695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9889144645924229, "compression/movement_sparsity/importance_threshold": -7.885250158655259e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9143750443579036, "compression/movement_sparsity/model_sparsity": 0.8829634486375535, "compression_loss": 104.7964859008789, "distillation_loss": 4.063858509063721, "epoch": 4.33, "learning_rate": 3.148774302620456e-05, "loss": 110.1601, "step": 5125, "task_loss": 1.9851672649383545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9889564022606441, "compression/movement_sparsity/importance_threshold": -7.855419483560576e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9143966628738274, "compression/movement_sparsity/model_sparsity": 0.882984324490949, "compression_loss": 104.80028533935547, "distillation_loss": 3.8833467960357666, "epoch": 4.33, "learning_rate": 3.1483046867662254e-05, "loss": 109.4963, "step": 5126, "task_loss": 2.683992385864258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.988998234025719, "compression/movement_sparsity/importance_threshold": -7.825664138413381e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9144701395947996, "compression/movement_sparsity/model_sparsity": 0.883055277060515, "compression_loss": 104.80404663085938, "distillation_loss": 4.161914348602295, "epoch": 4.33, "learning_rate": 3.147835070911994e-05, "loss": 110.0336, "step": 5127, "task_loss": 2.5169899463653564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9890399600215328, "compression/movement_sparsity/importance_threshold": -7.795984027979957e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9144675162779198, "compression/movement_sparsity/model_sparsity": 0.8830527438626401, "compression_loss": 104.80783081054688, "distillation_loss": 4.716585636138916, "epoch": 4.33, "learning_rate": 3.1473654550577634e-05, "loss": 109.064, "step": 5128, "task_loss": 3.0635228157043457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9890815803819707, "compression/movement_sparsity/importance_threshold": -7.766379057026328e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9144707715756843, "compression/movement_sparsity/model_sparsity": 0.8830558873309121, "compression_loss": 104.8116226196289, "distillation_loss": 4.94752311706543, "epoch": 4.34, "learning_rate": 3.146895839203532e-05, "loss": 108.797, "step": 5129, "task_loss": 2.8985984325408936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9891230952409178, "compression/movement_sparsity/importance_threshold": -7.736849130318864e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9145304043380312, "compression/movement_sparsity/model_sparsity": 0.8831134715244205, "compression_loss": 104.8154067993164, "distillation_loss": 4.027481555938721, "epoch": 4.34, "learning_rate": 3.1464262233493e-05, "loss": 109.3499, "step": 5130, "task_loss": 2.6593081951141357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9891645047322591, "compression/movement_sparsity/importance_threshold": -7.707394152623673e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9145438309507893, "compression/movement_sparsity/model_sparsity": 0.8831264368917252, "compression_loss": 104.81916809082031, "distillation_loss": 4.423262596130371, "epoch": 4.34, "learning_rate": 3.145956607495069e-05, "loss": 109.5345, "step": 5131, "task_loss": 2.2275307178497314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9892058089898801, "compression/movement_sparsity/importance_threshold": -7.678014028706867e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.914541970780638, "compression/movement_sparsity/model_sparsity": 0.8831246406241413, "compression_loss": 104.82294464111328, "distillation_loss": 6.333189010620117, "epoch": 4.34, "learning_rate": 3.145486991640838e-05, "loss": 109.8132, "step": 5132, "task_loss": 3.195333480834961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9892470081476655, "compression/movement_sparsity/importance_threshold": -7.648708663334815e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9145538949482739, "compression/movement_sparsity/model_sparsity": 0.8831361551599358, "compression_loss": 104.82671356201172, "distillation_loss": 4.062263488769531, "epoch": 4.34, "learning_rate": 3.145017375786607e-05, "loss": 108.6976, "step": 5133, "task_loss": 2.0108344554901123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9892881023395008, "compression/movement_sparsity/importance_threshold": -7.619477961273453e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.91460445341905, "compression/movement_sparsity/model_sparsity": 0.8831849767917047, "compression_loss": 104.8304672241211, "distillation_loss": 5.373843669891357, "epoch": 4.34, "learning_rate": 3.144547759932375e-05, "loss": 110.0969, "step": 5134, "task_loss": 2.2434237003326416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.989329091699271, "compression/movement_sparsity/importance_threshold": -7.590321827289238e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9145842181065719, "compression/movement_sparsity/model_sparsity": 0.8831654366244613, "compression_loss": 104.83419036865234, "distillation_loss": 3.8238143920898438, "epoch": 4.34, "learning_rate": 3.1440781440781445e-05, "loss": 108.2802, "step": 5135, "task_loss": 2.4345524311065674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9893699763608611, "compression/movement_sparsity/importance_threshold": -7.561240166148193e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9146974619266097, "compression/movement_sparsity/model_sparsity": 0.883274790170902, "compression_loss": 104.83784484863281, "distillation_loss": 5.005682945251465, "epoch": 4.34, "learning_rate": 3.143608528223913e-05, "loss": 109.3132, "step": 5136, "task_loss": 2.1152501106262207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9894107564581565, "compression/movement_sparsity/importance_threshold": -7.532232882616514e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9147529212302841, "compression/movement_sparsity/model_sparsity": 0.8833283442768824, "compression_loss": 104.841552734375, "distillation_loss": 4.678318023681641, "epoch": 4.34, "learning_rate": 3.143138912369682e-05, "loss": 108.7456, "step": 5137, "task_loss": 1.836376667022705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9894514321250422, "compression/movement_sparsity/importance_threshold": -7.503299881460312e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9146752710506394, "compression/movement_sparsity/model_sparsity": 0.8832533616197884, "compression_loss": 104.84521484375, "distillation_loss": 4.166795253753662, "epoch": 4.34, "learning_rate": 3.1426692965154504e-05, "loss": 109.106, "step": 5138, "task_loss": 2.242715358734131 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9894920034954032, "compression/movement_sparsity/importance_threshold": -7.474441067446044e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9146892700234439, "compression/movement_sparsity/model_sparsity": 0.8832668796848112, "compression_loss": 104.84886169433594, "distillation_loss": 4.0333757400512695, "epoch": 4.34, "learning_rate": 3.142199680661219e-05, "loss": 108.7305, "step": 5139, "task_loss": 1.9663035869598389 }, { "compression/movement_sparsity/importance_regularization_factor": 0.989532470703125, "compression/movement_sparsity/importance_threshold": -7.445656345339557e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9147459932888876, "compression/movement_sparsity/model_sparsity": 0.8833216543315858, "compression_loss": 104.85250854492188, "distillation_loss": 4.236049652099609, "epoch": 4.34, "learning_rate": 3.141730064806988e-05, "loss": 108.9723, "step": 5140, "task_loss": 2.4922826290130615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9895728338820925, "compression/movement_sparsity/importance_threshold": -7.416945619907224e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9147735977369646, "compression/movement_sparsity/model_sparsity": 0.8833483104819502, "compression_loss": 104.85619354248047, "distillation_loss": 4.089936256408691, "epoch": 4.35, "learning_rate": 3.141260448952757e-05, "loss": 108.5699, "step": 5141, "task_loss": 3.197683811187744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9896130931661908, "compression/movement_sparsity/importance_threshold": -7.38830879591524e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9148410050566103, "compression/movement_sparsity/model_sparsity": 0.8834134021527967, "compression_loss": 104.85978698730469, "distillation_loss": 4.495527267456055, "epoch": 4.35, "learning_rate": 3.1407908330985256e-05, "loss": 108.9472, "step": 5142, "task_loss": 1.9396209716796875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9896532486893052, "compression/movement_sparsity/importance_threshold": -7.359745778129801e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9149022952782586, "compression/movement_sparsity/model_sparsity": 0.8834725868667807, "compression_loss": 104.86341857910156, "distillation_loss": 4.8812737464904785, "epoch": 4.35, "learning_rate": 3.140321217244294e-05, "loss": 109.0444, "step": 5143, "task_loss": 3.536740779876709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9896933005853207, "compression/movement_sparsity/importance_threshold": -7.331256471316931e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.914899135373835, "compression/movement_sparsity/model_sparsity": 0.8834695355147951, "compression_loss": 104.86700439453125, "distillation_loss": 3.905332326889038, "epoch": 4.35, "learning_rate": 3.139851601390063e-05, "loss": 109.1023, "step": 5144, "task_loss": 2.2481930255889893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9897332489881225, "compression/movement_sparsity/importance_threshold": -7.302840780243e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9149271333194441, "compression/movement_sparsity/model_sparsity": 0.8834965716448407, "compression_loss": 104.87062072753906, "distillation_loss": 4.458486557006836, "epoch": 4.35, "learning_rate": 3.139381985535832e-05, "loss": 109.2131, "step": 5145, "task_loss": 3.1433868408203125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9897730940315957, "compression/movement_sparsity/importance_threshold": -7.274498609674118e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9149271690919469, "compression/movement_sparsity/model_sparsity": 0.8834966061884482, "compression_loss": 104.87419128417969, "distillation_loss": 5.107104301452637, "epoch": 4.35, "learning_rate": 3.138912369681601e-05, "loss": 108.8591, "step": 5146, "task_loss": 2.444031000137329 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9898128358496256, "compression/movement_sparsity/importance_threshold": -7.246229864376393e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9149729578956687, "compression/movement_sparsity/model_sparsity": 0.8835408220058991, "compression_loss": 104.87775421142578, "distillation_loss": 3.3299832344055176, "epoch": 4.35, "learning_rate": 3.1384427538273694e-05, "loss": 108.678, "step": 5147, "task_loss": 1.3845503330230713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9898524745760973, "compression/movement_sparsity/importance_threshold": -7.21803444911611e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9149898544412087, "compression/movement_sparsity/model_sparsity": 0.88355713810312, "compression_loss": 104.8813247680664, "distillation_loss": 2.0368192195892334, "epoch": 4.35, "learning_rate": 3.137973137973138e-05, "loss": 107.9326, "step": 5148, "task_loss": 1.0148530006408691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9898920103448957, "compression/movement_sparsity/importance_threshold": -7.189912268659466e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9150658710098873, "compression/movement_sparsity/model_sparsity": 0.8836305432688102, "compression_loss": 104.88490295410156, "distillation_loss": 3.3433632850646973, "epoch": 4.35, "learning_rate": 3.137503522118907e-05, "loss": 108.9649, "step": 5149, "task_loss": 1.555788516998291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9899314432899061, "compression/movement_sparsity/importance_threshold": -7.161863227772656e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9150821117262073, "compression/movement_sparsity/model_sparsity": 0.8836462260665624, "compression_loss": 104.88843536376953, "distillation_loss": 4.196287631988525, "epoch": 4.35, "learning_rate": 3.137033906264676e-05, "loss": 109.119, "step": 5150, "task_loss": 2.2449588775634766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9899707735450137, "compression/movement_sparsity/importance_threshold": -7.133887231221703e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.915119517840081, "compression/movement_sparsity/model_sparsity": 0.8836823471653499, "compression_loss": 104.89198303222656, "distillation_loss": 5.119906425476074, "epoch": 4.35, "learning_rate": 3.136564290410444e-05, "loss": 109.6073, "step": 5151, "task_loss": 2.972118854522705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9900100012441035, "compression/movement_sparsity/importance_threshold": -7.105984183773065e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.91517646766471, "compression/movement_sparsity/model_sparsity": 0.8837373405883046, "compression_loss": 104.8955078125, "distillation_loss": 3.6876652240753174, "epoch": 4.35, "learning_rate": 3.136094674556213e-05, "loss": 108.9284, "step": 5152, "task_loss": 2.1584439277648926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9900491265210609, "compression/movement_sparsity/importance_threshold": -7.078153990192591e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9152068027471756, "compression/movement_sparsity/model_sparsity": 0.8837666335673658, "compression_loss": 104.8990478515625, "distillation_loss": 4.959772109985352, "epoch": 4.36, "learning_rate": 3.135625058701982e-05, "loss": 109.0207, "step": 5153, "task_loss": 3.0820066928863525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9900881495097708, "compression/movement_sparsity/importance_threshold": -7.050396555246825e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.915194771262031, "compression/movement_sparsity/model_sparsity": 0.8837550154007492, "compression_loss": 104.9025650024414, "distillation_loss": 3.5444464683532715, "epoch": 4.36, "learning_rate": 3.1351554428477506e-05, "loss": 108.4347, "step": 5154, "task_loss": 2.9988865852355957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9901270703441183, "compression/movement_sparsity/importance_threshold": -7.022711783701702e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9152047875628452, "compression/movement_sparsity/model_sparsity": 0.8837646876108166, "compression_loss": 104.90603637695312, "distillation_loss": 4.2707061767578125, "epoch": 4.36, "learning_rate": 3.134685826993519e-05, "loss": 109.1941, "step": 5155, "task_loss": 2.148707389831543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9901658891579888, "compression/movement_sparsity/importance_threshold": -6.995099580323419e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9151836937102973, "compression/movement_sparsity/model_sparsity": 0.8837443183969961, "compression_loss": 104.90953826904297, "distillation_loss": 3.3347582817077637, "epoch": 4.36, "learning_rate": 3.134216211139288e-05, "loss": 108.9002, "step": 5156, "task_loss": 1.5671314001083374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9902046060852672, "compression/movement_sparsity/importance_threshold": -6.96755984987826e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9151905381825203, "compression/movement_sparsity/model_sparsity": 0.8837509277405421, "compression_loss": 104.91297149658203, "distillation_loss": 4.059309482574463, "epoch": 4.36, "learning_rate": 3.133746595285057e-05, "loss": 109.8407, "step": 5157, "task_loss": 2.2665224075317383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9902432212598387, "compression/movement_sparsity/importance_threshold": -6.94009249713242e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9151656643688318, "compression/movement_sparsity/model_sparsity": 0.8837269084188747, "compression_loss": 104.9163818359375, "distillation_loss": 4.396568298339844, "epoch": 4.36, "learning_rate": 3.133276979430826e-05, "loss": 109.1895, "step": 5158, "task_loss": 1.8057100772857666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9902817348155885, "compression/movement_sparsity/importance_threshold": -6.912697426852098e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9151993382182355, "compression/movement_sparsity/model_sparsity": 0.8837594254679585, "compression_loss": 104.91984558105469, "distillation_loss": 3.7757949829101562, "epoch": 4.36, "learning_rate": 3.132807363576595e-05, "loss": 108.8662, "step": 5159, "task_loss": 2.3198320865631104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9903201468864018, "compression/movement_sparsity/importance_threshold": -6.885374543803228e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9152637525718045, "compression/movement_sparsity/model_sparsity": 0.8838216269903206, "compression_loss": 104.92324829101562, "distillation_loss": 2.875896692276001, "epoch": 4.36, "learning_rate": 3.132337747722363e-05, "loss": 108.4414, "step": 5160, "task_loss": 1.3091908693313599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9903584576061635, "compression/movement_sparsity/importance_threshold": -6.858123752752354e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9153265213902396, "compression/movement_sparsity/model_sparsity": 0.883882239506743, "compression_loss": 104.92668151855469, "distillation_loss": 4.829172134399414, "epoch": 4.36, "learning_rate": 3.131868131868132e-05, "loss": 109.5436, "step": 5161, "task_loss": 2.3122074604034424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.990396667108759, "compression/movement_sparsity/importance_threshold": -6.830944958465413e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9153439425991556, "compression/movement_sparsity/model_sparsity": 0.8838990622435389, "compression_loss": 104.93004608154297, "distillation_loss": 5.090088367462158, "epoch": 4.36, "learning_rate": 3.131398516013901e-05, "loss": 109.5398, "step": 5162, "task_loss": 2.114130735397339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9904347755280732, "compression/movement_sparsity/importance_threshold": -6.803838065708688e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9153882528060905, "compression/movement_sparsity/model_sparsity": 0.8839418502585514, "compression_loss": 104.93341064453125, "distillation_loss": 7.0244293212890625, "epoch": 4.36, "learning_rate": 3.1309289001596696e-05, "loss": 109.7606, "step": 5163, "task_loss": 2.930917263031006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9904727829979916, "compression/movement_sparsity/importance_threshold": -6.776802979248289e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9153851763708405, "compression/movement_sparsity/model_sparsity": 0.8839388795083164, "compression_loss": 104.936767578125, "distillation_loss": 4.651798248291016, "epoch": 4.36, "learning_rate": 3.130459284305438e-05, "loss": 109.7732, "step": 5164, "task_loss": 2.9868876934051514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.990510689652399, "compression/movement_sparsity/importance_threshold": -6.749839603850498e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9154612764086926, "compression/movement_sparsity/model_sparsity": 0.8840123652757572, "compression_loss": 104.940185546875, "distillation_loss": 4.127422332763672, "epoch": 4.37, "learning_rate": 3.129989668451207e-05, "loss": 108.8628, "step": 5165, "task_loss": 1.9335741996765137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9905484956251805, "compression/movement_sparsity/importance_threshold": -6.722947844281513e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9155055269947893, "compression/movement_sparsity/model_sparsity": 0.8840550957180907, "compression_loss": 104.94351959228516, "distillation_loss": 4.648988246917725, "epoch": 4.37, "learning_rate": 3.129520052596976e-05, "loss": 109.0512, "step": 5166, "task_loss": 3.326845407485962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9905862010502215, "compression/movement_sparsity/importance_threshold": -6.696127605307443e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9155250587813768, "compression/movement_sparsity/model_sparsity": 0.8840739565277221, "compression_loss": 104.94679260253906, "distillation_loss": 4.691593170166016, "epoch": 4.37, "learning_rate": 3.129050436742745e-05, "loss": 109.2482, "step": 5167, "task_loss": 3.3667969703674316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9906238060614072, "compression/movement_sparsity/importance_threshold": -6.669378791694398e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9155020451378396, "compression/movement_sparsity/model_sparsity": 0.8840517334736387, "compression_loss": 104.95011901855469, "distillation_loss": 4.350789546966553, "epoch": 4.37, "learning_rate": 3.128580820888513e-05, "loss": 109.1324, "step": 5168, "task_loss": 2.1377112865448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9906613107926224, "compression/movement_sparsity/importance_threshold": -6.642701308208748e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9155267162406782, "compression/movement_sparsity/model_sparsity": 0.8840755570481976, "compression_loss": 104.95343780517578, "distillation_loss": 4.2193756103515625, "epoch": 4.37, "learning_rate": 3.128111205034282e-05, "loss": 109.6667, "step": 5169, "task_loss": 2.598621368408203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9906987153777524, "compression/movement_sparsity/importance_threshold": -6.616095059616602e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9155677711498484, "compression/movement_sparsity/model_sparsity": 0.8841152015949383, "compression_loss": 104.95672607421875, "distillation_loss": 4.958504676818848, "epoch": 4.37, "learning_rate": 3.127641589180051e-05, "loss": 109.6848, "step": 5170, "task_loss": 2.507119655609131 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9907360199506825, "compression/movement_sparsity/importance_threshold": -6.589559950684071e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9155311401068711, "compression/movement_sparsity/model_sparsity": 0.8840798289409774, "compression_loss": 104.96006774902344, "distillation_loss": 5.499320030212402, "epoch": 4.37, "learning_rate": 3.12717197332582e-05, "loss": 108.9989, "step": 5171, "task_loss": 3.08725905418396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9907732246452976, "compression/movement_sparsity/importance_threshold": -6.563095886177438e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9155449840654963, "compression/movement_sparsity/model_sparsity": 0.8840931973170348, "compression_loss": 104.96337127685547, "distillation_loss": 4.934704303741455, "epoch": 4.37, "learning_rate": 3.126702357471588e-05, "loss": 109.0716, "step": 5172, "task_loss": 2.3479726314544678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.990810329595483, "compression/movement_sparsity/importance_threshold": -6.5367027708629e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.915607669414758, "compression/movement_sparsity/model_sparsity": 0.8841537292317068, "compression_loss": 104.96669006347656, "distillation_loss": 4.346250534057617, "epoch": 4.37, "learning_rate": 3.126232741617357e-05, "loss": 108.8508, "step": 5173, "task_loss": 2.2621660232543945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9908473349351238, "compression/movement_sparsity/importance_threshold": -6.510380509506478e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.915623027742673, "compression/movement_sparsity/model_sparsity": 0.8841685599538102, "compression_loss": 104.96996307373047, "distillation_loss": 4.97256326675415, "epoch": 4.37, "learning_rate": 3.125763125763126e-05, "loss": 109.2976, "step": 5174, "task_loss": 3.190720319747925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.990884240798105, "compression/movement_sparsity/importance_threshold": -6.484129006874631e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9155802676775309, "compression/movement_sparsity/model_sparsity": 0.8841272688284509, "compression_loss": 104.97327423095703, "distillation_loss": 2.65899658203125, "epoch": 4.37, "learning_rate": 3.1252935099088946e-05, "loss": 108.6843, "step": 5175, "task_loss": 2.1174120903015137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9909210473183119, "compression/movement_sparsity/importance_threshold": -6.457948167733294e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.915639530790681, "compression/movement_sparsity/model_sparsity": 0.8841844960713497, "compression_loss": 104.97647094726562, "distillation_loss": 4.610378742218018, "epoch": 4.38, "learning_rate": 3.124823894054664e-05, "loss": 109.5826, "step": 5176, "task_loss": 3.309217929840088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9909577546296297, "compression/movement_sparsity/importance_threshold": -6.431837896848751e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9156763526203406, "compression/movement_sparsity/model_sparsity": 0.8842200529578833, "compression_loss": 104.9797592163086, "distillation_loss": 4.782753944396973, "epoch": 4.38, "learning_rate": 3.124354278200432e-05, "loss": 109.7091, "step": 5177, "task_loss": 2.7787420749664307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9909943628659433, "compression/movement_sparsity/importance_threshold": -6.405798098987198e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9157615627222665, "compression/movement_sparsity/model_sparsity": 0.8843023358306711, "compression_loss": 104.98298645019531, "distillation_loss": 3.318711042404175, "epoch": 4.38, "learning_rate": 3.123884662346201e-05, "loss": 109.3077, "step": 5178, "task_loss": 2.2689425945281982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.991030872161138, "compression/movement_sparsity/importance_threshold": -6.379828678914831e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9158358025899673, "compression/movement_sparsity/model_sparsity": 0.8843740253305279, "compression_loss": 104.98619079589844, "distillation_loss": 3.7291741371154785, "epoch": 4.38, "learning_rate": 3.12341504649197e-05, "loss": 109.4645, "step": 5179, "task_loss": 2.906224250793457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.991067282649099, "compression/movement_sparsity/importance_threshold": -6.353929541397761e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9159092196901014, "compression/movement_sparsity/model_sparsity": 0.884444920327415, "compression_loss": 104.98941802978516, "distillation_loss": 3.8908305168151855, "epoch": 4.38, "learning_rate": 3.1229454306377384e-05, "loss": 108.6121, "step": 5180, "task_loss": 2.1954524517059326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9911035944637113, "compression/movement_sparsity/importance_threshold": -6.328100591202271e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.915877370238346, "compression/movement_sparsity/model_sparsity": 0.8844141650023077, "compression_loss": 104.9925537109375, "distillation_loss": 4.940258979797363, "epoch": 4.38, "learning_rate": 3.122475814783507e-05, "loss": 108.9925, "step": 5181, "task_loss": 2.2128243446350098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9911398077388601, "compression/movement_sparsity/importance_threshold": -6.30234173309447e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9158957334565052, "compression/movement_sparsity/model_sparsity": 0.8844318973874313, "compression_loss": 104.9957504272461, "distillation_loss": 2.5341339111328125, "epoch": 4.38, "learning_rate": 3.122006198929276e-05, "loss": 109.2872, "step": 5182, "task_loss": 1.79051673412323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9911759226084307, "compression/movement_sparsity/importance_threshold": -6.276652871840555e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9159345585463275, "compression/movement_sparsity/model_sparsity": 0.8844693887159784, "compression_loss": 104.99897003173828, "distillation_loss": 4.270163536071777, "epoch": 4.38, "learning_rate": 3.121536583075045e-05, "loss": 109.2827, "step": 5183, "task_loss": 3.06170392036438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9912119392063079, "compression/movement_sparsity/importance_threshold": -6.251033912206723e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9159510258218327, "compression/movement_sparsity/model_sparsity": 0.8844852902899105, "compression_loss": 105.00209045410156, "distillation_loss": 4.529436111450195, "epoch": 4.38, "learning_rate": 3.1210669672208136e-05, "loss": 109.2824, "step": 5184, "task_loss": 3.2331409454345703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9912478576663771, "compression/movement_sparsity/importance_threshold": -6.22548475895917e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9160368679046433, "compression/movement_sparsity/model_sparsity": 0.8845681834330955, "compression_loss": 105.0052719116211, "distillation_loss": 4.1393961906433105, "epoch": 4.38, "learning_rate": 3.120597351366582e-05, "loss": 109.3643, "step": 5185, "task_loss": 2.2882823944091797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9912836781225234, "compression/movement_sparsity/importance_threshold": -6.200005316864092e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9160383465014301, "compression/movement_sparsity/model_sparsity": 0.884569611235534, "compression_loss": 105.00846862792969, "distillation_loss": 2.5587730407714844, "epoch": 4.38, "learning_rate": 3.120127735512351e-05, "loss": 108.5782, "step": 5186, "task_loss": 2.261737108230591 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9913194007086319, "compression/movement_sparsity/importance_threshold": -6.1745954906876e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9160879510387954, "compression/movement_sparsity/model_sparsity": 0.8846175117044393, "compression_loss": 105.01164245605469, "distillation_loss": 5.130493640899658, "epoch": 4.38, "learning_rate": 3.1196581196581195e-05, "loss": 109.4733, "step": 5187, "task_loss": 2.080986261367798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9913550255585877, "compression/movement_sparsity/importance_threshold": -6.149255185196063e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9160953559468972, "compression/movement_sparsity/model_sparsity": 0.8846246622311676, "compression_loss": 105.01484680175781, "distillation_loss": 2.765418767929077, "epoch": 4.39, "learning_rate": 3.119188503803889e-05, "loss": 108.3293, "step": 5188, "task_loss": 1.7980271577835083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.991390552806276, "compression/movement_sparsity/importance_threshold": -6.123984305155417e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9160256353387303, "compression/movement_sparsity/model_sparsity": 0.884557336740377, "compression_loss": 105.01792907714844, "distillation_loss": 2.6159815788269043, "epoch": 4.39, "learning_rate": 3.118718887949657e-05, "loss": 109.0525, "step": 5189, "task_loss": 2.4786465167999268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9914259825855819, "compression/movement_sparsity/importance_threshold": -6.0987827553320335e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916083968366805, "compression/movement_sparsity/model_sparsity": 0.8846136658494839, "compression_loss": 105.02104187011719, "distillation_loss": 5.492721080780029, "epoch": 4.39, "learning_rate": 3.118249272095426e-05, "loss": 110.1079, "step": 5190, "task_loss": 2.632953405380249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9914613150303906, "compression/movement_sparsity/importance_threshold": -6.073650440492021e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9160947835868507, "compression/movement_sparsity/model_sparsity": 0.8846241095334495, "compression_loss": 105.0241470336914, "distillation_loss": 3.97795033454895, "epoch": 4.39, "learning_rate": 3.117779656241195e-05, "loss": 109.0018, "step": 5191, "task_loss": 1.903247594833374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9914965502745872, "compression/movement_sparsity/importance_threshold": -6.048587265401577e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9160974307520658, "compression/movement_sparsity/model_sparsity": 0.884626665760396, "compression_loss": 105.0272445678711, "distillation_loss": 5.687835693359375, "epoch": 4.39, "learning_rate": 3.1173100403869634e-05, "loss": 109.1331, "step": 5192, "task_loss": 3.545607566833496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9915316884520569, "compression/movement_sparsity/importance_threshold": -6.02359313482681e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9161562288226783, "compression/movement_sparsity/model_sparsity": 0.8846834439363989, "compression_loss": 105.03034973144531, "distillation_loss": 3.9870285987854004, "epoch": 4.39, "learning_rate": 3.116840424532733e-05, "loss": 108.7005, "step": 5193, "task_loss": 2.165593147277832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9915667296966847, "compression/movement_sparsity/importance_threshold": -5.998667953534091e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9161730538232125, "compression/movement_sparsity/model_sparsity": 0.8846996909464049, "compression_loss": 105.03336334228516, "distillation_loss": 5.271295547485352, "epoch": 4.39, "learning_rate": 3.116370808678501e-05, "loss": 109.6429, "step": 5194, "task_loss": 3.1540520191192627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9916016741423559, "compression/movement_sparsity/importance_threshold": -5.973811626289443e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916275565892378, "compression/movement_sparsity/model_sparsity": 0.8847986814106306, "compression_loss": 105.03641510009766, "distillation_loss": 4.465782642364502, "epoch": 4.39, "learning_rate": 3.11590119282427e-05, "loss": 109.0615, "step": 5195, "task_loss": 1.8264145851135254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9916365219229556, "compression/movement_sparsity/importance_threshold": -5.949024057859062e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916246649785861, "compression/movement_sparsity/model_sparsity": 0.8847707586613288, "compression_loss": 105.03948974609375, "distillation_loss": 4.613128185272217, "epoch": 4.39, "learning_rate": 3.1154315769700386e-05, "loss": 108.9055, "step": 5196, "task_loss": 2.5166969299316406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9916712731723689, "compression/movement_sparsity/importance_threshold": -5.924305153009145e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9162713328128672, "compression/movement_sparsity/model_sparsity": 0.8847945937504235, "compression_loss": 105.04248809814453, "distillation_loss": 7.23764181137085, "epoch": 4.39, "learning_rate": 3.114961961115808e-05, "loss": 109.538, "step": 5197, "task_loss": 3.3097381591796875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9917059280244809, "compression/movement_sparsity/importance_threshold": -5.8996548165060614e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9163844573912286, "compression/movement_sparsity/model_sparsity": 0.8849038321515064, "compression_loss": 105.04548645019531, "distillation_loss": 2.4327824115753174, "epoch": 4.39, "learning_rate": 3.114492345261576e-05, "loss": 109.0643, "step": 5198, "task_loss": 1.6031664609909058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9917404866131768, "compression/movement_sparsity/importance_threshold": -5.875072953115748e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9164070059922281, "compression/movement_sparsity/model_sparsity": 0.8849256061386938, "compression_loss": 105.04847717285156, "distillation_loss": 4.701687335968018, "epoch": 4.39, "learning_rate": 3.114022729407345e-05, "loss": 109.1962, "step": 5199, "task_loss": 3.4232959747314453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9917749490723418, "compression/movement_sparsity/importance_threshold": -5.850559467604488e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9164896762464475, "compression/movement_sparsity/model_sparsity": 0.8850054364153573, "compression_loss": 105.05146789550781, "distillation_loss": 3.5096969604492188, "epoch": 4.4, "learning_rate": 3.113553113553114e-05, "loss": 108.9451, "step": 5200, "task_loss": 1.2224457263946533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9918093155358609, "compression/movement_sparsity/importance_threshold": -5.826114264738478e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9164928838475416, "compression/movement_sparsity/model_sparsity": 0.885008533825486, "compression_loss": 105.05436706542969, "distillation_loss": 5.689059257507324, "epoch": 4.4, "learning_rate": 3.1130834976988825e-05, "loss": 109.6238, "step": 5201, "task_loss": 3.1198744773864746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9918435861376194, "compression/movement_sparsity/importance_threshold": -5.801737249283914e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916511795577412, "compression/movement_sparsity/model_sparsity": 0.8850267958792563, "compression_loss": 105.05728149414062, "distillation_loss": 4.682552337646484, "epoch": 4.4, "learning_rate": 3.112613881844651e-05, "loss": 109.9737, "step": 5202, "task_loss": 1.9132312536239624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9918777610115023, "compression/movement_sparsity/importance_threshold": -5.7774283260069066e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165003483764816, "compression/movement_sparsity/model_sparsity": 0.8850157419248935, "compression_loss": 105.0602035522461, "distillation_loss": 4.006004810333252, "epoch": 4.4, "learning_rate": 3.11214426599042e-05, "loss": 109.007, "step": 5203, "task_loss": 2.0425469875335693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9919118402913948, "compression/movement_sparsity/importance_threshold": -5.753187399673652e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165223484657699, "compression/movement_sparsity/model_sparsity": 0.8850369862434344, "compression_loss": 105.06306457519531, "distillation_loss": 3.8668670654296875, "epoch": 4.4, "learning_rate": 3.111674650136189e-05, "loss": 108.7254, "step": 5204, "task_loss": 2.6839709281921387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.991945824111182, "compression/movement_sparsity/importance_threshold": -5.729014375050433e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165012069165515, "compression/movement_sparsity/model_sparsity": 0.8850165709714707, "compression_loss": 105.06598663330078, "distillation_loss": 4.609126567840576, "epoch": 4.4, "learning_rate": 3.111205034281958e-05, "loss": 109.4372, "step": 5205, "task_loss": 3.46915602684021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9919797126047492, "compression/movement_sparsity/importance_threshold": -5.7049091569033594e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9164812577840966, "compression/movement_sparsity/model_sparsity": 0.8849973071530864, "compression_loss": 105.06884765625, "distillation_loss": 4.0761590003967285, "epoch": 4.4, "learning_rate": 3.110735418427726e-05, "loss": 109.5648, "step": 5206, "task_loss": 2.039581537246704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9920135059059813, "compression/movement_sparsity/importance_threshold": -5.6808716499986284e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9164992513530591, "compression/movement_sparsity/model_sparsity": 0.8850146825876004, "compression_loss": 105.0716781616211, "distillation_loss": 4.46539306640625, "epoch": 4.4, "learning_rate": 3.110265802573495e-05, "loss": 109.5954, "step": 5207, "task_loss": 1.8083127737045288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9920472041487636, "compression/movement_sparsity/importance_threshold": -5.656901759102436e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165085999004857, "compression/movement_sparsity/model_sparsity": 0.8850237099836633, "compression_loss": 105.07453155517578, "distillation_loss": 5.630430698394775, "epoch": 4.4, "learning_rate": 3.1097961867192636e-05, "loss": 109.7134, "step": 5208, "task_loss": 3.2901601791381836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9920808074669812, "compression/movement_sparsity/importance_threshold": -5.632999388980892e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165798468021099, "compression/movement_sparsity/model_sparsity": 0.8850925093350357, "compression_loss": 105.07745361328125, "distillation_loss": 4.295858383178711, "epoch": 4.4, "learning_rate": 3.109326570865033e-05, "loss": 109.3781, "step": 5209, "task_loss": 2.122316837310791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9921143159945193, "compression/movement_sparsity/importance_threshold": -5.609164444400367e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165690554303995, "compression/movement_sparsity/model_sparsity": 0.8850820886801416, "compression_loss": 105.08033752441406, "distillation_loss": 3.7604408264160156, "epoch": 4.4, "learning_rate": 3.1088569550108015e-05, "loss": 109.4856, "step": 5210, "task_loss": 2.742830276489258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9921477298652629, "compression/movement_sparsity/importance_threshold": -5.585396830126883e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165747551825294, "compression/movement_sparsity/model_sparsity": 0.8850875926282514, "compression_loss": 105.08316802978516, "distillation_loss": 5.595411777496338, "epoch": 4.4, "learning_rate": 3.10838733915657e-05, "loss": 109.6669, "step": 5211, "task_loss": 2.4586095809936523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9921810492130972, "compression/movement_sparsity/importance_threshold": -5.561696450926638e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165671475635777, "compression/movement_sparsity/model_sparsity": 0.8850802463544145, "compression_loss": 105.08604431152344, "distillation_loss": 3.6663436889648438, "epoch": 4.41, "learning_rate": 3.107917723302339e-05, "loss": 109.2456, "step": 5212, "task_loss": 2.2676379680633545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9922142741719074, "compression/movement_sparsity/importance_threshold": -5.5380632115659134e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165615909014594, "compression/movement_sparsity/model_sparsity": 0.8850748805807342, "compression_loss": 105.08892822265625, "distillation_loss": 5.774740219116211, "epoch": 4.41, "learning_rate": 3.1074481074481074e-05, "loss": 109.3286, "step": 5213, "task_loss": 2.0526621341705322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9922474048755787, "compression/movement_sparsity/importance_threshold": -5.5144970168107335e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9166069862076491, "compression/movement_sparsity/model_sparsity": 0.885118716418504, "compression_loss": 105.09178924560547, "distillation_loss": 5.082400321960449, "epoch": 4.41, "learning_rate": 3.106978491593877e-05, "loss": 109.0191, "step": 5214, "task_loss": 3.1908485889434814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9922804414579961, "compression/movement_sparsity/importance_threshold": -5.490997771427381e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9166019661330744, "compression/movement_sparsity/model_sparsity": 0.8851138687989345, "compression_loss": 105.09465026855469, "distillation_loss": 3.833782196044922, "epoch": 4.41, "learning_rate": 3.106508875739645e-05, "loss": 109.3724, "step": 5215, "task_loss": 2.585695266723633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9923133840530447, "compression/movement_sparsity/importance_threshold": -5.46756538018214e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9166676086759099, "compression/movement_sparsity/model_sparsity": 0.8851772563184835, "compression_loss": 105.09754180908203, "distillation_loss": 4.956536769866943, "epoch": 4.41, "learning_rate": 3.106039259885414e-05, "loss": 109.8596, "step": 5216, "task_loss": 2.0071842670440674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9923462327946098, "compression/movement_sparsity/importance_threshold": -5.4441997478410324e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9167190256867558, "compression/movement_sparsity/model_sparsity": 0.8852269069968296, "compression_loss": 105.10042572021484, "distillation_loss": 5.712106704711914, "epoch": 4.41, "learning_rate": 3.1055696440311826e-05, "loss": 109.3736, "step": 5217, "task_loss": 4.013970375061035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9923789878165764, "compression/movement_sparsity/importance_threshold": -5.420900779170342e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9166917193428696, "compression/movement_sparsity/model_sparsity": 0.88520053870986, "compression_loss": 105.10320281982422, "distillation_loss": 4.499550819396973, "epoch": 4.41, "learning_rate": 3.105100028176951e-05, "loss": 109.5686, "step": 5218, "task_loss": 2.916879892349243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9924116492528298, "compression/movement_sparsity/importance_threshold": -5.397668378936179e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9167008055586081, "compression/movement_sparsity/model_sparsity": 0.8852093127861355, "compression_loss": 105.10607147216797, "distillation_loss": 3.1426234245300293, "epoch": 4.41, "learning_rate": 3.10463041232272e-05, "loss": 108.7278, "step": 5219, "task_loss": 2.4215354919433594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.992444217237255, "compression/movement_sparsity/importance_threshold": -5.374502451904826e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9167326550103635, "compression/movement_sparsity/model_sparsity": 0.8852400681112427, "compression_loss": 105.10885620117188, "distillation_loss": 4.717231750488281, "epoch": 4.41, "learning_rate": 3.1041607964684885e-05, "loss": 108.9009, "step": 5220, "task_loss": 1.8179587125778198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9924766919037372, "compression/movement_sparsity/importance_threshold": -5.351402902842306e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9167713727826772, "compression/movement_sparsity/model_sparsity": 0.8852774558089676, "compression_loss": 105.11164855957031, "distillation_loss": 5.542663097381592, "epoch": 4.41, "learning_rate": 3.103691180614258e-05, "loss": 109.3894, "step": 5221, "task_loss": 3.8033828735351562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9925090733861615, "compression/movement_sparsity/importance_threshold": -5.3283696365149895e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9167439948937852, "compression/movement_sparsity/model_sparsity": 0.8852510184347834, "compression_loss": 105.11437225341797, "distillation_loss": 4.774196624755859, "epoch": 4.41, "learning_rate": 3.1032215647600265e-05, "loss": 109.1633, "step": 5222, "task_loss": 2.720365285873413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9925413618184131, "compression/movement_sparsity/importance_threshold": -5.305402557688899e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9167458550639365, "compression/movement_sparsity/model_sparsity": 0.8852528147023673, "compression_loss": 105.11712646484375, "distillation_loss": 3.7056400775909424, "epoch": 4.41, "learning_rate": 3.102751948905796e-05, "loss": 109.5024, "step": 5223, "task_loss": 1.605666995048523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9925735573343771, "compression/movement_sparsity/importance_threshold": -5.282501571130405e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9167107383902489, "compression/movement_sparsity/model_sparsity": 0.8852189043944524, "compression_loss": 105.11994171142578, "distillation_loss": 3.7616846561431885, "epoch": 4.42, "learning_rate": 3.102282333051564e-05, "loss": 108.7436, "step": 5224, "task_loss": 2.021286964416504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9926056600679386, "compression/movement_sparsity/importance_threshold": -5.259666581605531e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9167335612471038, "compression/movement_sparsity/model_sparsity": 0.8852409432159631, "compression_loss": 105.12261199951172, "distillation_loss": 5.108575344085693, "epoch": 4.42, "learning_rate": 3.1018127171973324e-05, "loss": 109.5385, "step": 5225, "task_loss": 2.6010303497314453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9926376701529829, "compression/movement_sparsity/importance_threshold": -5.236897493880559e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9167475005990702, "compression/movement_sparsity/model_sparsity": 0.8852544037083069, "compression_loss": 105.12535858154297, "distillation_loss": 4.161294937133789, "epoch": 4.42, "learning_rate": 3.101343101343102e-05, "loss": 109.2999, "step": 5226, "task_loss": 2.4396986961364746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.992669587723395, "compression/movement_sparsity/importance_threshold": -5.214194212721599e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916750159688453, "compression/movement_sparsity/model_sparsity": 0.8852569714497891, "compression_loss": 105.12808227539062, "distillation_loss": 5.213353157043457, "epoch": 4.42, "learning_rate": 3.10087348548887e-05, "loss": 108.7394, "step": 5227, "task_loss": 2.8157567977905273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99270141291306, "compression/movement_sparsity/importance_threshold": -5.1915566428948484e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9168224439926616, "compression/movement_sparsity/model_sparsity": 0.8853267725657756, "compression_loss": 105.13076782226562, "distillation_loss": 2.5332326889038086, "epoch": 4.42, "learning_rate": 3.100403869634639e-05, "loss": 108.9511, "step": 5228, "task_loss": 1.1931376457214355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9927331458558631, "compression/movement_sparsity/importance_threshold": -5.16898468916659e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169019901149604, "compression/movement_sparsity/model_sparsity": 0.885403586034061, "compression_loss": 105.13346862792969, "distillation_loss": 4.357779502868652, "epoch": 4.42, "learning_rate": 3.0999342537804076e-05, "loss": 108.955, "step": 5229, "task_loss": 3.2297377586364746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9927647866856896, "compression/movement_sparsity/importance_threshold": -5.146478256302933e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169032660008974, "compression/movement_sparsity/model_sparsity": 0.885404818089391, "compression_loss": 105.13615417480469, "distillation_loss": 4.976962566375732, "epoch": 4.42, "learning_rate": 3.099464637926177e-05, "loss": 109.2559, "step": 5230, "task_loss": 1.9525827169418335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9927963355364243, "compression/movement_sparsity/importance_threshold": -5.124037249069988e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916960621247226, "compression/movement_sparsity/model_sparsity": 0.8854602030065627, "compression_loss": 105.13882446289062, "distillation_loss": 4.546326637268066, "epoch": 4.42, "learning_rate": 3.0989950220719455e-05, "loss": 109.1379, "step": 5231, "task_loss": 2.418938398361206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9928277925419527, "compression/movement_sparsity/importance_threshold": -5.101661572234038e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169577236744905, "compression/movement_sparsity/model_sparsity": 0.8854574049743646, "compression_loss": 105.14151000976562, "distillation_loss": 4.60490608215332, "epoch": 4.42, "learning_rate": 3.0985254062177135e-05, "loss": 108.8989, "step": 5232, "task_loss": 2.507697105407715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9928591578361597, "compression/movement_sparsity/importance_threshold": -5.07935113056128e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.91698445765833, "compression/movement_sparsity/model_sparsity": 0.8854832205636161, "compression_loss": 105.1442642211914, "distillation_loss": 4.686638832092285, "epoch": 4.42, "learning_rate": 3.098055790363483e-05, "loss": 109.0559, "step": 5233, "task_loss": 3.18576717376709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9928904315529306, "compression/movement_sparsity/importance_threshold": -5.0571058288179094e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169707090930459, "compression/movement_sparsity/model_sparsity": 0.885469944303845, "compression_loss": 105.1468505859375, "distillation_loss": 3.6179027557373047, "epoch": 4.42, "learning_rate": 3.0975861745092514e-05, "loss": 109.0246, "step": 5234, "task_loss": 2.317737102508545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9929216138261504, "compression/movement_sparsity/importance_threshold": -5.03492557176995e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.91693946777384, "compression/movement_sparsity/model_sparsity": 0.8854397762200632, "compression_loss": 105.14956665039062, "distillation_loss": 3.788475513458252, "epoch": 4.42, "learning_rate": 3.097116558655021e-05, "loss": 109.3121, "step": 5235, "task_loss": 1.6217838525772095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9929527047897043, "compression/movement_sparsity/importance_threshold": -5.0128102641837716e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169944620349766, "compression/movement_sparsity/model_sparsity": 0.8854928812591476, "compression_loss": 105.15218353271484, "distillation_loss": 5.0395002365112305, "epoch": 4.43, "learning_rate": 3.0966469428007894e-05, "loss": 109.6252, "step": 5236, "task_loss": 3.210350751876831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9929837045774774, "compression/movement_sparsity/importance_threshold": -4.990759810825484e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169838495257806, "compression/movement_sparsity/model_sparsity": 0.8854826333222905, "compression_loss": 105.15486907958984, "distillation_loss": 5.1932759284973145, "epoch": 4.43, "learning_rate": 3.096177326946558e-05, "loss": 109.7802, "step": 5237, "task_loss": 2.0835227966308594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9930146133233549, "compression/movement_sparsity/importance_threshold": -4.968774116461284e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169154048035507, "compression/movement_sparsity/model_sparsity": 0.8854165398868299, "compression_loss": 105.15750122070312, "distillation_loss": 3.9941186904907227, "epoch": 4.43, "learning_rate": 3.0957077110923267e-05, "loss": 108.9448, "step": 5238, "task_loss": 2.6336517333984375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9930454311612219, "compression/movement_sparsity/importance_threshold": -4.9468530858573674e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169264465827816, "compression/movement_sparsity/model_sparsity": 0.8854272023469756, "compression_loss": 105.16012573242188, "distillation_loss": 4.970432758331299, "epoch": 4.43, "learning_rate": 3.095238095238095e-05, "loss": 109.3262, "step": 5239, "task_loss": 3.816368818283081 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9930761582249636, "compression/movement_sparsity/importance_threshold": -4.9249966237799314e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170088783536483, "compression/movement_sparsity/model_sparsity": 0.8855068023329232, "compression_loss": 105.1627197265625, "distillation_loss": 5.160538196563721, "epoch": 4.43, "learning_rate": 3.0947684793838646e-05, "loss": 109.034, "step": 5240, "task_loss": 1.9850313663482666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9931067946484651, "compression/movement_sparsity/importance_threshold": -4.9032046349950856e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169874029277361, "compression/movement_sparsity/model_sparsity": 0.8854860646539573, "compression_loss": 105.16529846191406, "distillation_loss": 2.9938530921936035, "epoch": 4.43, "learning_rate": 3.0942988635296326e-05, "loss": 109.226, "step": 5241, "task_loss": 1.7234081029891968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9931373405656115, "compression/movement_sparsity/importance_threshold": -4.881477024269113e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169613009247812, "compression/movement_sparsity/model_sparsity": 0.885460859335103, "compression_loss": 105.16791534423828, "distillation_loss": 3.738168478012085, "epoch": 4.43, "learning_rate": 3.093829247675402e-05, "loss": 109.0255, "step": 5242, "task_loss": 2.964353322982788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.993167796110288, "compression/movement_sparsity/importance_threshold": -4.859813696368124e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.917008759111972, "compression/movement_sparsity/model_sparsity": 0.8855066871875653, "compression_loss": 105.17044067382812, "distillation_loss": 4.4508233070373535, "epoch": 4.43, "learning_rate": 3.0933596318211705e-05, "loss": 109.6346, "step": 5243, "task_loss": 1.7019823789596558 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9931981614163798, "compression/movement_sparsity/importance_threshold": -4.8382145560582276e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170006387538119, "compression/movement_sparsity/model_sparsity": 0.8854988457886892, "compression_loss": 105.17303466796875, "distillation_loss": 3.5078518390655518, "epoch": 4.43, "learning_rate": 3.092890015966939e-05, "loss": 108.5074, "step": 5244, "task_loss": 2.116057872772217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9932284366177719, "compression/movement_sparsity/importance_threshold": -4.816679508105881e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170040252174205, "compression/movement_sparsity/model_sparsity": 0.8855021159168549, "compression_loss": 105.17555236816406, "distillation_loss": 4.207507610321045, "epoch": 4.43, "learning_rate": 3.092420400112708e-05, "loss": 109.2795, "step": 5245, "task_loss": 2.7052714824676514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9932586218483495, "compression/movement_sparsity/importance_threshold": -4.7952084572770205e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.917004108686594, "compression/movement_sparsity/model_sparsity": 0.8855021965186054, "compression_loss": 105.17811584472656, "distillation_loss": 3.9657106399536133, "epoch": 4.43, "learning_rate": 3.0919507842584764e-05, "loss": 109.5192, "step": 5246, "task_loss": 2.051788806915283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9932887172419977, "compression/movement_sparsity/importance_threshold": -4.773801308337929e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169794018112525, "compression/movement_sparsity/model_sparsity": 0.8854783384004391, "compression_loss": 105.18064880371094, "distillation_loss": 4.792137145996094, "epoch": 4.44, "learning_rate": 3.091481168404246e-05, "loss": 108.9807, "step": 5247, "task_loss": 2.8416590690612793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9933187229326018, "compression/movement_sparsity/importance_threshold": -4.752457966054717e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170009726305057, "compression/movement_sparsity/model_sparsity": 0.8854991681956915, "compression_loss": 105.18315124511719, "distillation_loss": 5.150908470153809, "epoch": 4.44, "learning_rate": 3.0910115525500144e-05, "loss": 109.3308, "step": 5248, "task_loss": 3.4290452003479004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9933486390540468, "compression/movement_sparsity/importance_threshold": -4.731178335193667e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169675730369576, "compression/movement_sparsity/model_sparsity": 0.885466915980931, "compression_loss": 105.18570709228516, "distillation_loss": 5.26080846786499, "epoch": 4.44, "learning_rate": 3.090541936695783e-05, "loss": 109.4759, "step": 5249, "task_loss": 2.5231101512908936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9933784657402178, "compression/movement_sparsity/importance_threshold": -4.709962320520976e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169493290604748, "compression/movement_sparsity/model_sparsity": 0.8854492987411653, "compression_loss": 105.18821716308594, "distillation_loss": 3.9433374404907227, "epoch": 4.44, "learning_rate": 3.0900723208415516e-05, "loss": 109.6518, "step": 5250, "task_loss": 2.3092896938323975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.993408203125, "compression/movement_sparsity/importance_threshold": -4.6888098268027534e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170161163234033, "compression/movement_sparsity/model_sparsity": 0.8855137916561505, "compression_loss": 105.19074249267578, "distillation_loss": 4.5301666259765625, "epoch": 4.44, "learning_rate": 3.08960270498732e-05, "loss": 109.1485, "step": 5251, "task_loss": 2.925737142562866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9934378513422786, "compression/movement_sparsity/importance_threshold": -4.667720758805196e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170206952037755, "compression/movement_sparsity/model_sparsity": 0.8855182132378956, "compression_loss": 105.19329071044922, "distillation_loss": 4.246933460235596, "epoch": 4.44, "learning_rate": 3.0891330891330896e-05, "loss": 109.5885, "step": 5252, "task_loss": 1.677748441696167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9934674105259387, "compression/movement_sparsity/importance_threshold": -4.6466950212945e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.917039606933646, "compression/movement_sparsity/model_sparsity": 0.8855364752916658, "compression_loss": 105.19579315185547, "distillation_loss": 5.695709228515625, "epoch": 4.44, "learning_rate": 3.088663473278858e-05, "loss": 109.4771, "step": 5253, "task_loss": 3.117403984069824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9934968808098653, "compression/movement_sparsity/importance_threshold": -4.6257325190368626e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9171365623406931, "compression/movement_sparsity/model_sparsity": 0.8856300999822112, "compression_loss": 105.19837188720703, "distillation_loss": 4.5618696212768555, "epoch": 4.44, "learning_rate": 3.088193857424627e-05, "loss": 109.0071, "step": 5254, "task_loss": 2.4165849685668945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9935262623279438, "compression/movement_sparsity/importance_threshold": -4.604833156798393e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9171119866311956, "compression/movement_sparsity/model_sparsity": 0.8856063685239386, "compression_loss": 105.20079040527344, "distillation_loss": 5.913418292999268, "epoch": 4.44, "learning_rate": 3.0877242415703955e-05, "loss": 110.2576, "step": 5255, "task_loss": 3.0465099811553955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9935555552140591, "compression/movement_sparsity/importance_threshold": -4.583996839345461e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9171142760713817, "compression/movement_sparsity/model_sparsity": 0.8856085793148112, "compression_loss": 105.2032241821289, "distillation_loss": 4.4056596755981445, "epoch": 4.44, "learning_rate": 3.087254625716164e-05, "loss": 109.9677, "step": 5256, "task_loss": 1.3482165336608887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9935847596020966, "compression/movement_sparsity/importance_threshold": -4.56322347144409e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170904873569481, "compression/movement_sparsity/model_sparsity": 0.8855856078159011, "compression_loss": 105.20565795898438, "distillation_loss": 5.648919105529785, "epoch": 4.44, "learning_rate": 3.0867850098619334e-05, "loss": 110.1521, "step": 5257, "task_loss": 3.1760010719299316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9936138756259413, "compression/movement_sparsity/importance_threshold": -4.54251295786039e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9171078966416965, "compression/movement_sparsity/model_sparsity": 0.8856024190381612, "compression_loss": 105.20808410644531, "distillation_loss": 6.424398422241211, "epoch": 4.44, "learning_rate": 3.0863153940077014e-05, "loss": 110.2063, "step": 5258, "task_loss": 3.391432762145996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9936429034194781, "compression/movement_sparsity/importance_threshold": -4.521865203360818e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9171244712347103, "compression/movement_sparsity/model_sparsity": 0.8856184242429155, "compression_loss": 105.21060943603516, "distillation_loss": 5.314116477966309, "epoch": 4.45, "learning_rate": 3.085845778153471e-05, "loss": 109.0842, "step": 5259, "task_loss": 2.896977424621582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9936718431165926, "compression/movement_sparsity/importance_threshold": -4.501280112711309e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.917191747388512, "compression/movement_sparsity/model_sparsity": 0.8856833892538684, "compression_loss": 105.21299743652344, "distillation_loss": 3.571349859237671, "epoch": 4.45, "learning_rate": 3.085376162299239e-05, "loss": 109.481, "step": 5260, "task_loss": 1.741857886314392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9937006948511696, "compression/movement_sparsity/importance_threshold": -4.480757590678147e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9172483991089498, "compression/movement_sparsity/model_sparsity": 0.8857380948134282, "compression_loss": 105.21549987792969, "distillation_loss": 5.248410224914551, "epoch": 4.45, "learning_rate": 3.0849065464450086e-05, "loss": 109.3837, "step": 5261, "task_loss": 3.0533013343811035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9937294587570944, "compression/movement_sparsity/importance_threshold": -4.460297542027616e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9173324287182798, "compression/movement_sparsity/model_sparsity": 0.8858192377471723, "compression_loss": 105.2178955078125, "distillation_loss": 3.1252119541168213, "epoch": 4.45, "learning_rate": 3.0844369305907766e-05, "loss": 108.7341, "step": 5262, "task_loss": 2.300398826599121 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9937581349682522, "compression/movement_sparsity/importance_threshold": -4.439899871525651e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9173738890491497, "compression/movement_sparsity/model_sparsity": 0.8858592737881299, "compression_loss": 105.2203140258789, "distillation_loss": 4.622780799865723, "epoch": 4.45, "learning_rate": 3.083967314736545e-05, "loss": 109.6483, "step": 5263, "task_loss": 2.566293239593506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9937867236185278, "compression/movement_sparsity/importance_threshold": -4.4195644839387094e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9173794457112681, "compression/movement_sparsity/model_sparsity": 0.8858646395618102, "compression_loss": 105.22261047363281, "distillation_loss": 3.6376585960388184, "epoch": 4.45, "learning_rate": 3.0834976988823145e-05, "loss": 109.4034, "step": 5264, "task_loss": 2.6201443672180176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9938152248418067, "compression/movement_sparsity/importance_threshold": -4.399291284032814e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9174420833638592, "compression/movement_sparsity/model_sparsity": 0.8859251254183389, "compression_loss": 105.22502136230469, "distillation_loss": 6.753238677978516, "epoch": 4.45, "learning_rate": 3.083028083028083e-05, "loss": 110.2312, "step": 5265, "task_loss": 3.5934088230133057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9938436387719739, "compression/movement_sparsity/importance_threshold": -4.379080176574162e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9174351673466304, "compression/movement_sparsity/model_sparsity": 0.885918446987578, "compression_loss": 105.22736358642578, "distillation_loss": 4.449145793914795, "epoch": 4.45, "learning_rate": 3.082558467173852e-05, "loss": 109.447, "step": 5266, "task_loss": 2.5360465049743652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9938719655429147, "compression/movement_sparsity/importance_threshold": -4.358931066328949e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9174537332756394, "compression/movement_sparsity/model_sparsity": 0.8859363751198102, "compression_loss": 105.22972869873047, "distillation_loss": 4.888471603393555, "epoch": 4.45, "learning_rate": 3.0820888513196204e-05, "loss": 109.9564, "step": 5267, "task_loss": 3.7788357734680176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9939002052885139, "compression/movement_sparsity/importance_threshold": -4.338843858063459e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9174607208378741, "compression/movement_sparsity/model_sparsity": 0.8859431226377857, "compression_loss": 105.23207092285156, "distillation_loss": 5.25325345993042, "epoch": 4.45, "learning_rate": 3.08161923546539e-05, "loss": 109.8221, "step": 5268, "task_loss": 3.0956203937530518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9939283581426569, "compression/movement_sparsity/importance_threshold": -4.3188184565437146e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9174244355957581, "compression/movement_sparsity/model_sparsity": 0.885908083905363, "compression_loss": 105.23436737060547, "distillation_loss": 6.924116134643555, "epoch": 4.45, "learning_rate": 3.0811496196111584e-05, "loss": 109.9809, "step": 5269, "task_loss": 3.4301059246063232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9939564242392288, "compression/movement_sparsity/importance_threshold": -4.298854766535999e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9174769377058588, "compression/movement_sparsity/model_sparsity": 0.8859587824064663, "compression_loss": 105.23674774169922, "distillation_loss": 4.10304069519043, "epoch": 4.45, "learning_rate": 3.080680003756927e-05, "loss": 109.0227, "step": 5270, "task_loss": 1.541210651397705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9939844037121147, "compression/movement_sparsity/importance_threshold": -4.278952692806509e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9175140218672063, "compression/movement_sparsity/model_sparsity": 0.8859945926127873, "compression_loss": 105.23906707763672, "distillation_loss": 3.524043321609497, "epoch": 4.46, "learning_rate": 3.0802103879026956e-05, "loss": 109.2368, "step": 5271, "task_loss": 2.7506484985351562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9940122966951997, "compression/movement_sparsity/importance_threshold": -4.2591121401213544e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9175304652943762, "compression/movement_sparsity/model_sparsity": 0.8860104711576481, "compression_loss": 105.24137878417969, "distillation_loss": 3.9079980850219727, "epoch": 4.46, "learning_rate": 3.079740772048464e-05, "loss": 109.468, "step": 5272, "task_loss": 2.62796950340271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9940401033223691, "compression/movement_sparsity/importance_threshold": -4.239333013246818e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9175212598369613, "compression/movement_sparsity/model_sparsity": 0.8860015819360146, "compression_loss": 105.24365997314453, "distillation_loss": 5.8484697341918945, "epoch": 4.46, "learning_rate": 3.0792711561942336e-05, "loss": 109.3488, "step": 5273, "task_loss": 4.1734619140625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9940678237275079, "compression/movement_sparsity/importance_threshold": -4.21961521694901e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9175612415710444, "compression/movement_sparsity/model_sparsity": 0.8860401901745337, "compression_loss": 105.24598693847656, "distillation_loss": 4.3936944007873535, "epoch": 4.46, "learning_rate": 3.078801540340002e-05, "loss": 109.1785, "step": 5274, "task_loss": 2.5606260299682617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9940954580445013, "compression/movement_sparsity/importance_threshold": -4.199958655994127e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9176347302161842, "compression/movement_sparsity/model_sparsity": 0.8861111542586355, "compression_loss": 105.24826049804688, "distillation_loss": 3.6819562911987305, "epoch": 4.46, "learning_rate": 3.078331924485771e-05, "loss": 109.0115, "step": 5275, "task_loss": 1.4864989519119263 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9941230064072344, "compression/movement_sparsity/importance_threshold": -4.180363235148452e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.917719057929705, "compression/movement_sparsity/model_sparsity": 0.8861925850557745, "compression_loss": 105.25054931640625, "distillation_loss": 4.375253200531006, "epoch": 4.46, "learning_rate": 3.0778623086315395e-05, "loss": 109.7677, "step": 5276, "task_loss": 2.816133499145508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9941504689495924, "compression/movement_sparsity/importance_threshold": -4.1608288591780083e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9177350840110077, "compression/movement_sparsity/model_sparsity": 0.8862080605918824, "compression_loss": 105.2528076171875, "distillation_loss": 3.9666500091552734, "epoch": 4.46, "learning_rate": 3.077392692777308e-05, "loss": 108.7898, "step": 5277, "task_loss": 2.340773820877075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9941778458054604, "compression/movement_sparsity/importance_threshold": -4.141355432848992e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9177471393444875, "compression/movement_sparsity/model_sparsity": 0.8862197017875707, "compression_loss": 105.25509643554688, "distillation_loss": 4.539627552032471, "epoch": 4.46, "learning_rate": 3.0769230769230774e-05, "loss": 109.0643, "step": 5278, "task_loss": 1.9541809558868408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9942051371087235, "compression/movement_sparsity/importance_threshold": -4.121942860927687e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9177970419860436, "compression/movement_sparsity/model_sparsity": 0.8862678901198708, "compression_loss": 105.25739288330078, "distillation_loss": 5.308937072753906, "epoch": 4.46, "learning_rate": 3.0764534610688454e-05, "loss": 109.6029, "step": 5279, "task_loss": 2.5947113037109375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9942323429932669, "compression/movement_sparsity/importance_threshold": -4.102591048180289e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9177407837631376, "compression/movement_sparsity/model_sparsity": 0.8862135645399921, "compression_loss": 105.2596435546875, "distillation_loss": 5.904548645019531, "epoch": 4.46, "learning_rate": 3.075983845214615e-05, "loss": 109.6333, "step": 5280, "task_loss": 2.9296178817749023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9942594635929758, "compression/movement_sparsity/importance_threshold": -4.083299899372908e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9177359783235803, "compression/movement_sparsity/model_sparsity": 0.886208924182067, "compression_loss": 105.26189422607422, "distillation_loss": 4.934882164001465, "epoch": 4.46, "learning_rate": 3.075514229360383e-05, "loss": 108.8726, "step": 5281, "task_loss": 2.2843029499053955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9942864990417352, "compression/movement_sparsity/importance_threshold": -4.064069319271741e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9177787264645549, "compression/movement_sparsity/model_sparsity": 0.8862502037928904, "compression_loss": 105.26416778564453, "distillation_loss": 4.420166969299316, "epoch": 4.46, "learning_rate": 3.075044613506152e-05, "loss": 108.9341, "step": 5282, "task_loss": 3.395112991333008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9943134494734303, "compression/movement_sparsity/importance_threshold": -4.044899212643071e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9177755427117961, "compression/movement_sparsity/model_sparsity": 0.8862471294118333, "compression_loss": 105.26639556884766, "distillation_loss": 3.4558963775634766, "epoch": 4.47, "learning_rate": 3.074574997651921e-05, "loss": 109.0318, "step": 5283, "task_loss": 2.641267776489258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9943403150219462, "compression/movement_sparsity/importance_threshold": -4.0257894842530076e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.917844345159055, "compression/movement_sparsity/model_sparsity": 0.8863135682833677, "compression_loss": 105.26859283447266, "distillation_loss": 4.393041610717773, "epoch": 4.47, "learning_rate": 3.074105381797689e-05, "loss": 109.6285, "step": 5284, "task_loss": 2.3015849590301514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9943670958211681, "compression/movement_sparsity/importance_threshold": -4.006740038867661e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9178445955665754, "compression/movement_sparsity/model_sparsity": 0.8863138100886194, "compression_loss": 105.27082824707031, "distillation_loss": 3.4115312099456787, "epoch": 4.47, "learning_rate": 3.0736357659434586e-05, "loss": 109.6019, "step": 5285, "task_loss": 1.7998409271240234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9943937920049812, "compression/movement_sparsity/importance_threshold": -3.987750781253314e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9179375683016322, "compression/movement_sparsity/model_sparsity": 0.8864035889242095, "compression_loss": 105.27304077148438, "distillation_loss": 4.0389251708984375, "epoch": 4.47, "learning_rate": 3.073166150089227e-05, "loss": 109.6249, "step": 5286, "task_loss": 2.6891043186187744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9944204037072706, "compression/movement_sparsity/importance_threshold": -3.968821616176076e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9179314631278026, "compression/movement_sparsity/model_sparsity": 0.8863976934818827, "compression_loss": 105.27515411376953, "distillation_loss": 2.9931020736694336, "epoch": 4.47, "learning_rate": 3.072696534234996e-05, "loss": 109.2454, "step": 5287, "task_loss": 1.5513181686401367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9944469310619213, "compression/movement_sparsity/importance_threshold": -3.9499524484022316e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9179617028169272, "compression/movement_sparsity/model_sparsity": 0.8864268943446576, "compression_loss": 105.2773666381836, "distillation_loss": 4.25789737701416, "epoch": 4.47, "learning_rate": 3.0722269183807645e-05, "loss": 109.1417, "step": 5288, "task_loss": 2.3148086071014404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9944733742028186, "compression/movement_sparsity/importance_threshold": -3.931143182697976e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9180307198992036, "compression/movement_sparsity/model_sparsity": 0.8864935404778365, "compression_loss": 105.27953338623047, "distillation_loss": 3.744743824005127, "epoch": 4.47, "learning_rate": 3.071757302526533e-05, "loss": 109.0038, "step": 5289, "task_loss": 1.9224216938018799 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9944997332638476, "compression/movement_sparsity/importance_threshold": -3.9123937238293334e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.917999824380859, "compression/movement_sparsity/model_sparsity": 0.8864637063155928, "compression_loss": 105.28166198730469, "distillation_loss": 4.558573246002197, "epoch": 4.47, "learning_rate": 3.0712876866723024e-05, "loss": 109.5033, "step": 5290, "task_loss": 1.9534096717834473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9945260083788934, "compression/movement_sparsity/importance_threshold": -3.893703976562673e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.918055367153707, "compression/movement_sparsity/model_sparsity": 0.8865173410233237, "compression_loss": 105.28378295898438, "distillation_loss": 3.9675166606903076, "epoch": 4.47, "learning_rate": 3.070818070818071e-05, "loss": 110.2079, "step": 5291, "task_loss": 2.356440305709839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9945521996818412, "compression/movement_sparsity/importance_threshold": -3.875073845664018e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9180477476105876, "compression/movement_sparsity/model_sparsity": 0.886509983234951, "compression_loss": 105.28594970703125, "distillation_loss": 3.8889307975769043, "epoch": 4.47, "learning_rate": 3.07034845496384e-05, "loss": 110.1017, "step": 5292, "task_loss": 2.4948644638061523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9945783073065761, "compression/movement_sparsity/importance_threshold": -3.856503235899651e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9180320673301464, "compression/movement_sparsity/model_sparsity": 0.8864948416203812, "compression_loss": 105.2880630493164, "distillation_loss": 4.046431064605713, "epoch": 4.47, "learning_rate": 3.069878839109608e-05, "loss": 109.1274, "step": 5293, "task_loss": 1.6626354455947876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9946043313869832, "compression/movement_sparsity/importance_threshold": -3.83799205203577e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181091928464152, "compression/movement_sparsity/model_sparsity": 0.8865693176379003, "compression_loss": 105.2901611328125, "distillation_loss": 6.036298751831055, "epoch": 4.47, "learning_rate": 3.0694092232553776e-05, "loss": 110.235, "step": 5294, "task_loss": 2.3415167331695557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9946302720569479, "compression/movement_sparsity/importance_threshold": -3.8195401988384836e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181414596440378, "compression/movement_sparsity/model_sparsity": 0.8866004759717603, "compression_loss": 105.29228210449219, "distillation_loss": 3.9292404651641846, "epoch": 4.48, "learning_rate": 3.068939607401146e-05, "loss": 109.5753, "step": 5295, "task_loss": 2.484708070755005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.994656129450355, "compression/movement_sparsity/importance_threshold": -3.801147581073989e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181539203992173, "compression/movement_sparsity/model_sparsity": 0.8866125086616656, "compression_loss": 105.2943115234375, "distillation_loss": 4.354944229125977, "epoch": 4.48, "learning_rate": 3.068469991546914e-05, "loss": 108.9826, "step": 5296, "task_loss": 2.367419481277466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9946819037010897, "compression/movement_sparsity/importance_threshold": -3.7828141035086554e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181450249701609, "compression/movement_sparsity/model_sparsity": 0.8866039188179629, "compression_loss": 105.29634857177734, "distillation_loss": 4.944209575653076, "epoch": 4.48, "learning_rate": 3.0680003756926835e-05, "loss": 109.8783, "step": 5297, "task_loss": 2.6911628246307373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9947075949430373, "compression/movement_sparsity/importance_threshold": -3.764539670908333e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181266736761694, "compression/movement_sparsity/model_sparsity": 0.8865861979473751, "compression_loss": 105.29842376708984, "distillation_loss": 3.6977570056915283, "epoch": 4.48, "learning_rate": 3.067530759838452e-05, "loss": 109.1387, "step": 5298, "task_loss": 2.3426637649536133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9947332033100829, "compression/movement_sparsity/importance_threshold": -3.7463241880394786e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181759085643378, "compression/movement_sparsity/model_sparsity": 0.8866337414656708, "compression_loss": 105.30049133300781, "distillation_loss": 4.433216094970703, "epoch": 4.48, "learning_rate": 3.0670611439842215e-05, "loss": 109.7155, "step": 5299, "task_loss": 2.629582166671753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9947587289361115, "compression/movement_sparsity/importance_threshold": -3.7281675596682017e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9180908057799207, "compression/movement_sparsity/model_sparsity": 0.8865515622237051, "compression_loss": 105.30253601074219, "distillation_loss": 5.56558895111084, "epoch": 4.48, "learning_rate": 3.06659152812999e-05, "loss": 109.3461, "step": 5300, "task_loss": 2.0907092094421387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9947841719550083, "compression/movement_sparsity/importance_threshold": -3.710069690560699e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9180433714410652, "compression/movement_sparsity/model_sparsity": 0.8865057574003145, "compression_loss": 105.30455780029297, "distillation_loss": 3.334319829940796, "epoch": 4.48, "learning_rate": 3.066121912275759e-05, "loss": 109.3838, "step": 5301, "task_loss": 2.2673556804656982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9948095325006586, "compression/movement_sparsity/importance_threshold": -3.69203048548308e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9180451242937077, "compression/movement_sparsity/model_sparsity": 0.8865074500370762, "compression_loss": 105.30664825439453, "distillation_loss": 4.18484354019165, "epoch": 4.48, "learning_rate": 3.0656522964215274e-05, "loss": 109.3525, "step": 5302, "task_loss": 2.156930446624756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9948348107069473, "compression/movement_sparsity/importance_threshold": -3.674049849201628e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9180215979109622, "compression/movement_sparsity/model_sparsity": 0.8864847318579536, "compression_loss": 105.30870056152344, "distillation_loss": 4.740503311157227, "epoch": 4.48, "learning_rate": 3.065182680567296e-05, "loss": 109.093, "step": 5303, "task_loss": 3.376615047454834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9948600067077598, "compression/movement_sparsity/importance_threshold": -3.656127686482453e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.918036896618039, "compression/movement_sparsity/model_sparsity": 0.886499505007378, "compression_loss": 105.31076049804688, "distillation_loss": 5.265340805053711, "epoch": 4.48, "learning_rate": 3.064713064713065e-05, "loss": 110.2877, "step": 5304, "task_loss": 2.794856548309326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.994885120636981, "compression/movement_sparsity/importance_threshold": -3.638263902091838e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181270194770308, "compression/movement_sparsity/model_sparsity": 0.8865865318689131, "compression_loss": 105.31280517578125, "distillation_loss": 3.8792552947998047, "epoch": 4.48, "learning_rate": 3.064243448858833e-05, "loss": 109.0093, "step": 5305, "task_loss": 2.1278209686279297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9949101526284962, "compression/movement_sparsity/importance_threshold": -3.6204584007957195e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.918179473890461, "compression/movement_sparsity/model_sparsity": 0.8866371843118733, "compression_loss": 105.31489562988281, "distillation_loss": 4.728670120239258, "epoch": 4.48, "learning_rate": 3.0637738330046026e-05, "loss": 109.6011, "step": 5306, "task_loss": 2.3587446212768555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9949351028161905, "compression/movement_sparsity/importance_threshold": -3.602711087360641e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9182052181683868, "compression/movement_sparsity/model_sparsity": 0.8866620441946538, "compression_loss": 105.31700134277344, "distillation_loss": 4.091060161590576, "epoch": 4.49, "learning_rate": 3.063304217150371e-05, "loss": 109.4389, "step": 5307, "task_loss": 1.8159856796264648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.994959971333949, "compression/movement_sparsity/importance_threshold": -3.585021866552539e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181479225428965, "compression/movement_sparsity/model_sparsity": 0.886606716850161, "compression_loss": 105.31908416748047, "distillation_loss": 3.0357513427734375, "epoch": 4.49, "learning_rate": 3.06283460129614e-05, "loss": 109.396, "step": 5308, "task_loss": 1.55964994430542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9949847583156568, "compression/movement_sparsity/importance_threshold": -3.567390643137696e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181575572703462, "compression/movement_sparsity/model_sparsity": 0.8866160205950829, "compression_loss": 105.32112884521484, "distillation_loss": 4.268553733825684, "epoch": 4.49, "learning_rate": 3.0623649854419085e-05, "loss": 109.4412, "step": 5309, "task_loss": 3.011606454849243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9950094638951992, "compression/movement_sparsity/importance_threshold": -3.5498173218822225e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181656060835005, "compression/movement_sparsity/model_sparsity": 0.8866237929067443, "compression_loss": 105.32321166992188, "distillation_loss": 3.4559450149536133, "epoch": 4.49, "learning_rate": 3.061895369587677e-05, "loss": 109.6223, "step": 5310, "task_loss": 1.9942660331726074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9950340882064611, "compression/movement_sparsity/importance_threshold": -3.5323018075524015e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181902652621714, "compression/movement_sparsity/model_sparsity": 0.8866476049667674, "compression_loss": 105.32522583007812, "distillation_loss": 4.474799156188965, "epoch": 4.49, "learning_rate": 3.0614257537334464e-05, "loss": 110.5368, "step": 5311, "task_loss": 2.168689727783203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9950586313833278, "compression/movement_sparsity/importance_threshold": -3.5148440049144296e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181716277881565, "compression/movement_sparsity/model_sparsity": 0.8866296077473205, "compression_loss": 105.32728576660156, "distillation_loss": 4.841857433319092, "epoch": 4.49, "learning_rate": 3.060956137879215e-05, "loss": 108.8597, "step": 5312, "task_loss": 2.171424150466919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9950830935596846, "compression/movement_sparsity/importance_threshold": -3.497443818734243e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9182183824494567, "compression/movement_sparsity/model_sparsity": 0.886674756242171, "compression_loss": 105.32927703857422, "distillation_loss": 4.429961204528809, "epoch": 4.49, "learning_rate": 3.060486522024984e-05, "loss": 109.2699, "step": 5313, "task_loss": 1.5774646997451782 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9951074748694163, "compression/movement_sparsity/importance_threshold": -3.480101153778385e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181279257137711, "compression/movement_sparsity/model_sparsity": 0.8865874069736335, "compression_loss": 105.33125305175781, "distillation_loss": 4.438265800476074, "epoch": 4.49, "learning_rate": 3.060016906170752e-05, "loss": 110.1583, "step": 5314, "task_loss": 3.3703062534332275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9951317754464083, "compression/movement_sparsity/importance_threshold": -3.4628159148127925e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181164904370084, "compression/movement_sparsity/model_sparsity": 0.8865763645338065, "compression_loss": 105.33319854736328, "distillation_loss": 3.923837184906006, "epoch": 4.49, "learning_rate": 3.059547290316521e-05, "loss": 109.9991, "step": 5315, "task_loss": 2.6481125354766846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9951559954245457, "compression/movement_sparsity/importance_threshold": -3.4455880066036615e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181173012804076, "compression/movement_sparsity/model_sparsity": 0.8865771475222406, "compression_loss": 105.33513641357422, "distillation_loss": 4.4405412673950195, "epoch": 4.49, "learning_rate": 3.05907767446229e-05, "loss": 109.9219, "step": 5316, "task_loss": 2.0204477310180664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9951801349377135, "compression/movement_sparsity/importance_threshold": -3.428417333917362e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181109933957282, "compression/movement_sparsity/model_sparsity": 0.8865710563328053, "compression_loss": 105.33716583251953, "distillation_loss": 3.0069124698638916, "epoch": 4.49, "learning_rate": 3.058608058608059e-05, "loss": 108.8924, "step": 5317, "task_loss": 2.235471487045288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9952041941197969, "compression/movement_sparsity/importance_threshold": -3.4113038015199174e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9180988068964043, "compression/movement_sparsity/model_sparsity": 0.8865592884772232, "compression_loss": 105.33909606933594, "distillation_loss": 4.384404182434082, "epoch": 4.5, "learning_rate": 3.0581384427538275e-05, "loss": 109.3676, "step": 5318, "task_loss": 2.571617603302002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9952281731046811, "compression/movement_sparsity/importance_threshold": -3.394247314177524e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181304536373099, "compression/movement_sparsity/model_sparsity": 0.886589848055222, "compression_loss": 105.34110260009766, "distillation_loss": 7.034951210021973, "epoch": 4.5, "learning_rate": 3.057668826899596e-05, "loss": 110.3809, "step": 5319, "task_loss": 3.3091139793395996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9952520720262512, "compression/movement_sparsity/importance_threshold": -3.377247776656465e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181166216028523, "compression/movement_sparsity/model_sparsity": 0.8865764911937003, "compression_loss": 105.34300231933594, "distillation_loss": 3.5580391883850098, "epoch": 4.5, "learning_rate": 3.057199211045365e-05, "loss": 109.1715, "step": 5320, "task_loss": 2.3917596340179443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9952758910183924, "compression/movement_sparsity/importance_threshold": -3.36030509372285e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9181873915377712, "compression/movement_sparsity/model_sparsity": 0.8866448299636409, "compression_loss": 105.34485626220703, "distillation_loss": 3.221957206726074, "epoch": 4.5, "learning_rate": 3.056729595191134e-05, "loss": 109.5872, "step": 5321, "task_loss": 2.6634035110473633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9952996302149898, "compression/movement_sparsity/importance_threshold": -3.343419170142789e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9182509831237733, "compression/movement_sparsity/model_sparsity": 0.8867062369830332, "compression_loss": 105.3468017578125, "distillation_loss": 5.677908420562744, "epoch": 4.5, "learning_rate": 3.056259979336902e-05, "loss": 109.4169, "step": 5322, "task_loss": 2.2937421798706055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9953232897499285, "compression/movement_sparsity/importance_threshold": -3.326589910682652e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9183014223528729, "compression/movement_sparsity/model_sparsity": 0.8867549434694441, "compression_loss": 105.34868621826172, "distillation_loss": 2.7935712337493896, "epoch": 4.5, "learning_rate": 3.0557903634826714e-05, "loss": 108.9663, "step": 5323, "task_loss": 1.3704078197479248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9953468697570936, "compression/movement_sparsity/importance_threshold": -3.309817220108549e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9183479146824852, "compression/movement_sparsity/model_sparsity": 0.886799838644507, "compression_loss": 105.35063934326172, "distillation_loss": 3.717545747756958, "epoch": 4.5, "learning_rate": 3.05532074762844e-05, "loss": 109.7336, "step": 5324, "task_loss": 1.8508825302124023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9953703703703703, "compression/movement_sparsity/importance_threshold": -3.2931010031865895e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9183573109265822, "compression/movement_sparsity/model_sparsity": 0.8868089120987132, "compression_loss": 105.35248565673828, "distillation_loss": 3.7311649322509766, "epoch": 4.5, "learning_rate": 3.054851131774209e-05, "loss": 108.8008, "step": 5325, "task_loss": 1.7434366941452026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.995393791723644, "compression/movement_sparsity/importance_threshold": -3.27644116468297e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9184410066592184, "compression/movement_sparsity/model_sparsity": 0.886889732625455, "compression_loss": 105.35436248779297, "distillation_loss": 4.705486297607422, "epoch": 4.5, "learning_rate": 3.054381515919977e-05, "loss": 109.6567, "step": 5326, "task_loss": 2.825105667114258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9954171339507993, "compression/movement_sparsity/importance_threshold": -3.259837609363974e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185189907155569, "compression/movement_sparsity/model_sparsity": 0.8869650376895513, "compression_loss": 105.35624694824219, "distillation_loss": 3.264998197555542, "epoch": 4.5, "learning_rate": 3.053911900065746e-05, "loss": 109.4763, "step": 5327, "task_loss": 1.3692017793655396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9954403971857217, "compression/movement_sparsity/importance_threshold": -3.243290241995711e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9186361456625792, "compression/movement_sparsity/model_sparsity": 0.8870781680037326, "compression_loss": 105.3581314086914, "distillation_loss": 5.252989292144775, "epoch": 4.5, "learning_rate": 3.053442284211515e-05, "loss": 109.1953, "step": 5328, "task_loss": 2.7826998233795166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9954635815622964, "compression/movement_sparsity/importance_threshold": -3.226798967344378e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9186071699352241, "compression/movement_sparsity/model_sparsity": 0.8870501876817519, "compression_loss": 105.35990142822266, "distillation_loss": 2.7359261512756348, "epoch": 4.5, "learning_rate": 3.052972668357284e-05, "loss": 108.9186, "step": 5329, "task_loss": 1.3286482095718384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9954866872144084, "compression/movement_sparsity/importance_threshold": -3.2103636901761705e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185718863231895, "compression/movement_sparsity/model_sparsity": 0.8870161161703358, "compression_loss": 105.36177825927734, "distillation_loss": 5.045273780822754, "epoch": 4.51, "learning_rate": 3.052503052503053e-05, "loss": 109.7731, "step": 5330, "task_loss": 2.665040969848633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9955097142759428, "compression/movement_sparsity/importance_threshold": -3.193984315257199e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185730310432826, "compression/movement_sparsity/model_sparsity": 0.8870172215657721, "compression_loss": 105.36363983154297, "distillation_loss": 5.52754020690918, "epoch": 4.51, "learning_rate": 3.052033436648821e-05, "loss": 110.4599, "step": 5331, "task_loss": 3.018996238708496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9955326628807848, "compression/movement_sparsity/importance_threshold": -3.177660747353834e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185933021282636, "compression/movement_sparsity/model_sparsity": 0.8870367962766229, "compression_loss": 105.36544036865234, "distillation_loss": 4.543115139007568, "epoch": 4.51, "learning_rate": 3.05156382079459e-05, "loss": 109.1068, "step": 5332, "task_loss": 2.4106526374816895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9955555331628195, "compression/movement_sparsity/importance_threshold": -3.1613928912320975e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185688695087777, "compression/movement_sparsity/model_sparsity": 0.8870132029927799, "compression_loss": 105.36724090576172, "distillation_loss": 4.779504776000977, "epoch": 4.51, "learning_rate": 3.051094204940359e-05, "loss": 109.0978, "step": 5333, "task_loss": 3.3165011405944824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9955783252559322, "compression/movement_sparsity/importance_threshold": -3.1451806516581866e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185903807071928, "compression/movement_sparsity/model_sparsity": 0.8870339752153532, "compression_loss": 105.36903381347656, "distillation_loss": 3.0653727054595947, "epoch": 4.51, "learning_rate": 3.050624589086128e-05, "loss": 109.279, "step": 5334, "task_loss": 1.5473973751068115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9956010392940078, "compression/movement_sparsity/importance_threshold": -3.1290239333983845e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9186622953622047, "compression/movement_sparsity/model_sparsity": 0.8871034193807301, "compression_loss": 105.37081909179688, "distillation_loss": 4.333202838897705, "epoch": 4.51, "learning_rate": 3.0501549732318963e-05, "loss": 109.1158, "step": 5335, "task_loss": 3.112011432647705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9956236754109316, "compression/movement_sparsity/importance_threshold": -3.112922641218801e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9187658448339545, "compression/movement_sparsity/model_sparsity": 0.8872034116095698, "compression_loss": 105.37256622314453, "distillation_loss": 3.8698890209198, "epoch": 4.51, "learning_rate": 3.049685357377665e-05, "loss": 109.8364, "step": 5336, "task_loss": 1.3663413524627686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9956462337405887, "compression/movement_sparsity/importance_threshold": -3.0968766798855456e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9188005680101101, "compression/movement_sparsity/model_sparsity": 0.8872369419378036, "compression_loss": 105.37434387207031, "distillation_loss": 4.039062023162842, "epoch": 4.51, "learning_rate": 3.049215741523434e-05, "loss": 109.3154, "step": 5337, "task_loss": 2.0132956504821777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9956687144168642, "compression/movement_sparsity/importance_threshold": -3.0808859541649886e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9188135653528332, "compression/movement_sparsity/model_sparsity": 0.8872494927818196, "compression_loss": 105.37618255615234, "distillation_loss": 3.3075942993164062, "epoch": 4.51, "learning_rate": 3.048746125669203e-05, "loss": 108.7045, "step": 5338, "task_loss": 3.194579839706421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9956911175736434, "compression/movement_sparsity/importance_threshold": -3.064950368823153e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9188177865081764, "compression/movement_sparsity/model_sparsity": 0.8872535689274909, "compression_loss": 105.37787628173828, "distillation_loss": 4.28110408782959, "epoch": 4.51, "learning_rate": 3.0482765098149712e-05, "loss": 109.2528, "step": 5339, "task_loss": 1.8469855785369873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9957134433448112, "compression/movement_sparsity/importance_threshold": -3.0490698286264085e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9187875468190518, "compression/movement_sparsity/model_sparsity": 0.8872243680647159, "compression_loss": 105.37962341308594, "distillation_loss": 5.415424346923828, "epoch": 4.51, "learning_rate": 3.0478068939607402e-05, "loss": 109.5962, "step": 5340, "task_loss": 2.6255056858062744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9957356918642528, "compression/movement_sparsity/importance_threshold": -3.0332442383407786e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9188337648928084, "compression/movement_sparsity/model_sparsity": 0.8872689984054556, "compression_loss": 105.38136291503906, "distillation_loss": 4.996483325958252, "epoch": 4.51, "learning_rate": 3.047337278106509e-05, "loss": 109.7376, "step": 5341, "task_loss": 3.0266969203948975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9957578632658536, "compression/movement_sparsity/importance_threshold": -3.0174735027323728e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9188448185962068, "compression/movement_sparsity/model_sparsity": 0.8872796723801372, "compression_loss": 105.38309478759766, "distillation_loss": 4.450462818145752, "epoch": 4.52, "learning_rate": 3.0468676622522778e-05, "loss": 109.2135, "step": 5342, "task_loss": 2.2664542198181152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9957799576834985, "compression/movement_sparsity/importance_threshold": -3.001757526567561e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9188636707052391, "compression/movement_sparsity/model_sparsity": 0.8872978768612283, "compression_loss": 105.38477325439453, "distillation_loss": 2.8786168098449707, "epoch": 4.52, "learning_rate": 3.0463980463980464e-05, "loss": 109.0707, "step": 5343, "task_loss": 1.6680183410644531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9958019752510725, "compression/movement_sparsity/importance_threshold": -2.98609621461254e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9189447788934983, "compression/movement_sparsity/model_sparsity": 0.8873761987337028, "compression_loss": 105.38652038574219, "distillation_loss": 4.034934043884277, "epoch": 4.52, "learning_rate": 3.045928430543815e-05, "loss": 109.153, "step": 5344, "task_loss": 2.637389659881592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9958239161024611, "compression/movement_sparsity/importance_threshold": -2.970489471633333e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9189390075963625, "compression/movement_sparsity/model_sparsity": 0.8873706256983782, "compression_loss": 105.38816833496094, "distillation_loss": 4.02386999130249, "epoch": 4.52, "learning_rate": 3.045458814689584e-05, "loss": 109.5448, "step": 5345, "task_loss": 2.974600076675415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9958457803715492, "compression/movement_sparsity/importance_threshold": -2.9549372023962225e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9190395521778681, "compression/movement_sparsity/model_sparsity": 0.8874677162641977, "compression_loss": 105.38981628417969, "distillation_loss": 4.637992858886719, "epoch": 4.52, "learning_rate": 3.044989198835353e-05, "loss": 109.7976, "step": 5346, "task_loss": 2.58855938911438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9958675681922221, "compression/movement_sparsity/importance_threshold": -2.9394393116674057e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919063722465666, "compression/movement_sparsity/model_sparsity": 0.8874910562282533, "compression_loss": 105.39144134521484, "distillation_loss": 4.718484401702881, "epoch": 4.52, "learning_rate": 3.044519582981122e-05, "loss": 109.8187, "step": 5347, "task_loss": 2.3733530044555664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9958892796983647, "compression/movement_sparsity/importance_threshold": -2.923995704213079e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9190461939392414, "compression/movement_sparsity/model_sparsity": 0.8874741298606353, "compression_loss": 105.39311218261719, "distillation_loss": 2.6858019828796387, "epoch": 4.52, "learning_rate": 3.0440499671268903e-05, "loss": 108.9897, "step": 5348, "task_loss": 2.036860466003418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9959109150238624, "compression/movement_sparsity/importance_threshold": -2.908606284799265e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9190032430874169, "compression/movement_sparsity/model_sparsity": 0.8874326545027034, "compression_loss": 105.39469146728516, "distillation_loss": 4.4612321853637695, "epoch": 4.52, "learning_rate": 3.043580351272659e-05, "loss": 109.3069, "step": 5349, "task_loss": 2.4763576984405518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9959324743026002, "compression/movement_sparsity/importance_threshold": -2.8932709581923344e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9190355098850396, "compression/movement_sparsity/model_sparsity": 0.8874638128365634, "compression_loss": 105.39630889892578, "distillation_loss": 4.3861188888549805, "epoch": 4.52, "learning_rate": 3.043110735418428e-05, "loss": 109.3037, "step": 5350, "task_loss": 4.0792131423950195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9959539576684633, "compression/movement_sparsity/importance_threshold": -2.8779896291584832e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9190729040747456, "compression/movement_sparsity/model_sparsity": 0.8874999224208151, "compression_loss": 105.39787292480469, "distillation_loss": 3.7058262825012207, "epoch": 4.52, "learning_rate": 3.042641119564197e-05, "loss": 108.8431, "step": 5351, "task_loss": 1.633628010749817 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9959753652553368, "compression/movement_sparsity/importance_threshold": -2.8627622024637346e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9191102982644517, "compression/movement_sparsity/model_sparsity": 0.8875360320050668, "compression_loss": 105.39942169189453, "distillation_loss": 3.94285249710083, "epoch": 4.52, "learning_rate": 3.042171503709965e-05, "loss": 109.7105, "step": 5352, "task_loss": 1.8079195022583008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9959966971971058, "compression/movement_sparsity/importance_threshold": -2.847588582874372e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9191382365892226, "compression/movement_sparsity/model_sparsity": 0.8875630105624334, "compression_loss": 105.40103912353516, "distillation_loss": 3.7419562339782715, "epoch": 4.52, "learning_rate": 3.041701887855734e-05, "loss": 109.5708, "step": 5353, "task_loss": 2.239185333251953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9960179536276556, "compression/movement_sparsity/importance_threshold": -2.8324686751566783e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9191536903104787, "compression/movement_sparsity/model_sparsity": 0.8875779334008231, "compression_loss": 105.402587890625, "distillation_loss": 4.016082763671875, "epoch": 4.53, "learning_rate": 3.041232272001503e-05, "loss": 109.4696, "step": 5354, "task_loss": 3.281085968017578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.996039134680871, "compression/movement_sparsity/importance_threshold": -2.8174023840766768e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9192329264044189, "compression/movement_sparsity/model_sparsity": 0.8876544474911778, "compression_loss": 105.40415954589844, "distillation_loss": 4.342652320861816, "epoch": 4.53, "learning_rate": 3.0407626561472717e-05, "loss": 109.9811, "step": 5355, "task_loss": 3.2454144954681396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9960602404906376, "compression/movement_sparsity/importance_threshold": -2.802389614400564e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9192659444246026, "compression/movement_sparsity/model_sparsity": 0.8876863312407929, "compression_loss": 105.40576934814453, "distillation_loss": 4.130334854125977, "epoch": 4.53, "learning_rate": 3.04029304029304e-05, "loss": 109.4635, "step": 5356, "task_loss": 2.1682097911834717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9960812711908402, "compression/movement_sparsity/importance_threshold": -2.787430270894623e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9192836875860448, "compression/movement_sparsity/model_sparsity": 0.8877034648700551, "compression_loss": 105.40738677978516, "distillation_loss": 3.2998228073120117, "epoch": 4.53, "learning_rate": 3.039823424438809e-05, "loss": 108.8949, "step": 5357, "task_loss": 1.881295919418335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9961022269153641, "compression/movement_sparsity/importance_threshold": -2.772524258324964e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9193170275587547, "compression/movement_sparsity/model_sparsity": 0.8877356595121367, "compression_loss": 105.4089584350586, "distillation_loss": 4.626972198486328, "epoch": 4.53, "learning_rate": 3.039353808584578e-05, "loss": 109.5394, "step": 5358, "task_loss": 2.758014678955078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9961231077980944, "compression/movement_sparsity/importance_threshold": -2.757671481457783e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9193401127472977, "compression/movement_sparsity/model_sparsity": 0.887757951653435, "compression_loss": 105.41053771972656, "distillation_loss": 4.6565728187561035, "epoch": 4.53, "learning_rate": 3.038884192730347e-05, "loss": 109.7672, "step": 5359, "task_loss": 2.134526252746582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9961439139729162, "compression/movement_sparsity/importance_threshold": -2.742871845059277e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9193494135980537, "compression/movement_sparsity/model_sparsity": 0.8877669329913547, "compression_loss": 105.41211700439453, "distillation_loss": 5.560101509094238, "epoch": 4.53, "learning_rate": 3.0384145768761156e-05, "loss": 109.7551, "step": 5360, "task_loss": 3.860121965408325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9961646455737146, "compression/movement_sparsity/importance_threshold": -2.7281252538957287e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9193422590974721, "compression/movement_sparsity/model_sparsity": 0.887760024269878, "compression_loss": 105.41368103027344, "distillation_loss": 2.490968942642212, "epoch": 4.53, "learning_rate": 3.0379449610218842e-05, "loss": 109.4305, "step": 5361, "task_loss": 0.9869705438613892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.996185302734375, "compression/movement_sparsity/importance_threshold": -2.713431612733075e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9194498389378829, "compression/movement_sparsity/model_sparsity": 0.8878639084118163, "compression_loss": 105.41522979736328, "distillation_loss": 5.374977111816406, "epoch": 4.53, "learning_rate": 3.037475345167653e-05, "loss": 110.1113, "step": 5362, "task_loss": 2.9671249389648438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9962058855887823, "compression/movement_sparsity/importance_threshold": -2.6987908263376853e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9194517587288723, "compression/movement_sparsity/model_sparsity": 0.8878657622520792, "compression_loss": 105.41682434082031, "distillation_loss": 4.170245170593262, "epoch": 4.53, "learning_rate": 3.0370057293134218e-05, "loss": 110.3779, "step": 5363, "task_loss": 2.0200061798095703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9962263942708216, "compression/movement_sparsity/importance_threshold": -2.6842027994758433e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9194961762533159, "compression/movement_sparsity/model_sparsity": 0.8879086538979138, "compression_loss": 105.41836547851562, "distillation_loss": 4.522043228149414, "epoch": 4.53, "learning_rate": 3.0365361134591908e-05, "loss": 109.337, "step": 5364, "task_loss": 2.7926290035247803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9962468289143782, "compression/movement_sparsity/importance_threshold": -2.669667436913485e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9195561190440213, "compression/movement_sparsity/model_sparsity": 0.8879665374693531, "compression_loss": 105.4199447631836, "distillation_loss": 3.197877883911133, "epoch": 4.53, "learning_rate": 3.036066497604959e-05, "loss": 108.4784, "step": 5365, "task_loss": 1.9018311500549316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9962671896533372, "compression/movement_sparsity/importance_threshold": -2.655184643416894e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9195673873824373, "compression/movement_sparsity/model_sparsity": 0.8879774187056789, "compression_loss": 105.4214096069336, "distillation_loss": 4.995384216308594, "epoch": 4.54, "learning_rate": 3.035596881750728e-05, "loss": 109.4951, "step": 5366, "task_loss": 2.808417558670044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9962874766215837, "compression/movement_sparsity/importance_threshold": -2.6407543237523534e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9196205214734227, "compression/movement_sparsity/model_sparsity": 0.8880287274771794, "compression_loss": 105.42286682128906, "distillation_loss": 3.973822593688965, "epoch": 4.54, "learning_rate": 3.035127265896497e-05, "loss": 109.3583, "step": 5367, "task_loss": 2.1965482234954834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9963076899530029, "compression/movement_sparsity/importance_threshold": -2.626376382685886e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9195946818021558, "compression/movement_sparsity/model_sparsity": 0.8880037754781126, "compression_loss": 105.42440795898438, "distillation_loss": 4.647959232330322, "epoch": 4.54, "learning_rate": 3.0346576500422657e-05, "loss": 109.4212, "step": 5368, "task_loss": 2.5470311641693115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9963278297814798, "compression/movement_sparsity/importance_threshold": -2.6120507249837756e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9195652529564304, "compression/movement_sparsity/model_sparsity": 0.8879753576037717, "compression_loss": 105.42584991455078, "distillation_loss": 4.414793014526367, "epoch": 4.54, "learning_rate": 3.034188034188034e-05, "loss": 109.9877, "step": 5369, "task_loss": 2.0297014713287354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9963478962408997, "compression/movement_sparsity/importance_threshold": -2.597777255412305e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9195370761483069, "compression/movement_sparsity/model_sparsity": 0.8879481487556892, "compression_loss": 105.4273452758789, "distillation_loss": 4.196117877960205, "epoch": 4.54, "learning_rate": 3.033718418333803e-05, "loss": 109.8205, "step": 5370, "task_loss": 2.0168745517730713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9963678894651476, "compression/movement_sparsity/importance_threshold": -2.5835558787374972e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9196004530992915, "compression/movement_sparsity/model_sparsity": 0.8880093485134372, "compression_loss": 105.4288101196289, "distillation_loss": 3.1296567916870117, "epoch": 4.54, "learning_rate": 3.033248802479572e-05, "loss": 108.7429, "step": 5371, "task_loss": 2.4814555644989014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9963878095881089, "compression/movement_sparsity/importance_threshold": -2.5693864997254623e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9196281648648772, "compression/movement_sparsity/model_sparsity": 0.8880361082946236, "compression_loss": 105.43030548095703, "distillation_loss": 4.021720886230469, "epoch": 4.54, "learning_rate": 3.032779186625341e-05, "loss": 109.2431, "step": 5372, "task_loss": 2.1261210441589355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9964076567436684, "compression/movement_sparsity/importance_threshold": -2.555269023142657e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9196529552093922, "compression/movement_sparsity/model_sparsity": 0.8880600470145406, "compression_loss": 105.43177795410156, "distillation_loss": 4.161541938781738, "epoch": 4.54, "learning_rate": 3.0323095707711092e-05, "loss": 109.3089, "step": 5373, "task_loss": 3.5313022136688232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9964274310657115, "compression/movement_sparsity/importance_threshold": -2.541203353755017e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9196915418158619, "compression/movement_sparsity/model_sparsity": 0.8880973080523716, "compression_loss": 105.43323516845703, "distillation_loss": 3.74269700050354, "epoch": 4.54, "learning_rate": 3.031839954916878e-05, "loss": 109.4375, "step": 5374, "task_loss": 1.9550038576126099 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9964471326881232, "compression/movement_sparsity/importance_threshold": -2.5271893963289133e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9196638777469467, "compression/movement_sparsity/model_sparsity": 0.8880705943293283, "compression_loss": 105.43470001220703, "distillation_loss": 4.886573791503906, "epoch": 4.54, "learning_rate": 3.0313703390626468e-05, "loss": 109.7193, "step": 5375, "task_loss": 2.0348010063171387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9964667617447887, "compression/movement_sparsity/importance_threshold": -2.513227055630455e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9196381215448531, "compression/movement_sparsity/model_sparsity": 0.8880457229320121, "compression_loss": 105.43614196777344, "distillation_loss": 4.877065658569336, "epoch": 4.54, "learning_rate": 3.0309007232084158e-05, "loss": 109.6789, "step": 5376, "task_loss": 2.724550724029541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9964863183695931, "compression/movement_sparsity/importance_threshold": -2.499316236425752e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9196520608968195, "compression/movement_sparsity/model_sparsity": 0.888059183424356, "compression_loss": 105.43753814697266, "distillation_loss": 2.962174892425537, "epoch": 4.54, "learning_rate": 3.0304311073541847e-05, "loss": 108.2253, "step": 5377, "task_loss": 1.6304289102554321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9965058026964216, "compression/movement_sparsity/importance_threshold": -2.4854568434810875e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919677554767225, "compression/movement_sparsity/model_sparsity": 0.8880838015018847, "compression_loss": 105.43891143798828, "distillation_loss": 3.2733709812164307, "epoch": 4.55, "learning_rate": 3.029961491499953e-05, "loss": 109.2272, "step": 5378, "task_loss": 1.937685489654541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9965252148591592, "compression/movement_sparsity/importance_threshold": -2.4716487815626582e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197160459803535, "compression/movement_sparsity/model_sparsity": 0.8881209704234294, "compression_loss": 105.44027709960938, "distillation_loss": 5.3945112228393555, "epoch": 4.55, "learning_rate": 3.029491875645722e-05, "loss": 110.0225, "step": 5379, "task_loss": 4.21557092666626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9965445549916914, "compression/movement_sparsity/importance_threshold": -2.457891955436487e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197469653470334, "compression/movement_sparsity/model_sparsity": 0.8881508276147447, "compression_loss": 105.44163513183594, "distillation_loss": 5.168712615966797, "epoch": 4.55, "learning_rate": 3.0290222597914906e-05, "loss": 109.3885, "step": 5380, "task_loss": 3.3234095573425293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9965638232279028, "compression/movement_sparsity/importance_threshold": -2.444186269869031e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197942446717096, "compression/movement_sparsity/model_sparsity": 0.8881964827491701, "compression_loss": 105.44296264648438, "distillation_loss": 3.1644959449768066, "epoch": 4.55, "learning_rate": 3.0285526439372596e-05, "loss": 109.5611, "step": 5381, "task_loss": 1.8358170986175537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9965830197016791, "compression/movement_sparsity/importance_threshold": -2.4305316296262258e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198043325175295, "compression/movement_sparsity/model_sparsity": 0.8882062240464522, "compression_loss": 105.44437408447266, "distillation_loss": 4.161610126495361, "epoch": 4.55, "learning_rate": 3.028083028083028e-05, "loss": 109.4277, "step": 5382, "task_loss": 2.3596994876861572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9966021445469049, "compression/movement_sparsity/importance_threshold": -2.416927939474442e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198266426351762, "compression/movement_sparsity/model_sparsity": 0.8882277677429238, "compression_loss": 105.44561767578125, "distillation_loss": 4.768010139465332, "epoch": 4.55, "learning_rate": 3.027613412228797e-05, "loss": 110.0854, "step": 5383, "task_loss": 3.3670549392700195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9966211978974657, "compression/movement_sparsity/importance_threshold": -2.4033751041797023e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198601018495625, "compression/movement_sparsity/model_sparsity": 0.8882600775303633, "compression_loss": 105.44696807861328, "distillation_loss": 3.768191337585449, "epoch": 4.55, "learning_rate": 3.027143796374566e-05, "loss": 109.9373, "step": 5384, "task_loss": 2.1514248847961426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9966401798872466, "compression/movement_sparsity/importance_threshold": -2.3898730285082902e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198733257514706, "compression/movement_sparsity/model_sparsity": 0.8882728471505594, "compression_loss": 105.44824981689453, "distillation_loss": 2.6331071853637695, "epoch": 4.55, "learning_rate": 3.0266741805203348e-05, "loss": 108.9098, "step": 5385, "task_loss": 2.4347054958343506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9966590906501327, "compression/movement_sparsity/importance_threshold": -2.3764216172263153e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198478080327298, "compression/movement_sparsity/model_sparsity": 0.8882482060439592, "compression_loss": 105.44955444335938, "distillation_loss": 4.853364944458008, "epoch": 4.55, "learning_rate": 3.026204564666103e-05, "loss": 109.8019, "step": 5386, "task_loss": 3.4223883152008057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9966779303200091, "compression/movement_sparsity/importance_threshold": -2.3630207751001477e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198117851223019, "compression/movement_sparsity/model_sparsity": 0.8882134206313238, "compression_loss": 105.45082092285156, "distillation_loss": 3.6607606410980225, "epoch": 4.55, "learning_rate": 3.0257349488118718e-05, "loss": 109.9262, "step": 5387, "task_loss": 1.517484426498413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.996696699030761, "compression/movement_sparsity/importance_threshold": -2.3496704068957236e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197910489947831, "compression/movement_sparsity/model_sparsity": 0.8881933968535771, "compression_loss": 105.45222473144531, "distillation_loss": 3.2430624961853027, "epoch": 4.55, "learning_rate": 3.0252653329576407e-05, "loss": 109.707, "step": 5388, "task_loss": 1.3744697570800781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9967153969162734, "compression/movement_sparsity/importance_threshold": -2.336370417379413e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198292778762237, "compression/movement_sparsity/model_sparsity": 0.8882303124553343, "compression_loss": 105.45350646972656, "distillation_loss": 5.542283058166504, "epoch": 4.56, "learning_rate": 3.0247957171034097e-05, "loss": 109.4225, "step": 5389, "task_loss": 3.090888261795044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9967340241104317, "compression/movement_sparsity/importance_threshold": -2.3231207113172392e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198412258921949, "compression/movement_sparsity/model_sparsity": 0.8882418500202005, "compression_loss": 105.45477294921875, "distillation_loss": 5.452942848205566, "epoch": 4.56, "learning_rate": 3.0243261012491787e-05, "loss": 110.0368, "step": 5390, "task_loss": 4.453446865081787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9967525807471208, "compression/movement_sparsity/importance_threshold": -2.309921193475572e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198456139858848, "compression/movement_sparsity/model_sparsity": 0.8882460873693729, "compression_loss": 105.45606231689453, "distillation_loss": 5.970646858215332, "epoch": 4.56, "learning_rate": 3.023856485394947e-05, "loss": 110.0545, "step": 5391, "task_loss": 3.1278069019317627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.996771066960226, "compression/movement_sparsity/importance_threshold": -2.2967717686204342e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198578362577116, "compression/movement_sparsity/model_sparsity": 0.8882578897685623, "compression_loss": 105.45736694335938, "distillation_loss": 3.353882312774658, "epoch": 4.56, "learning_rate": 3.023386869540716e-05, "loss": 108.8303, "step": 5392, "task_loss": 1.5529049634933472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9967894828836323, "compression/movement_sparsity/importance_threshold": -2.2836723415181094e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198611511763144, "compression/movement_sparsity/model_sparsity": 0.8882610908095132, "compression_loss": 105.45875549316406, "distillation_loss": 3.5417909622192383, "epoch": 4.56, "learning_rate": 3.0229172536864846e-05, "loss": 108.921, "step": 5393, "task_loss": 2.0601351261138916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.996807828651225, "compression/movement_sparsity/importance_threshold": -2.270622816934794e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919842954896502, "compression/movement_sparsity/model_sparsity": 0.8882435196278907, "compression_loss": 105.46001434326172, "distillation_loss": 3.1873574256896973, "epoch": 4.56, "learning_rate": 3.0224476378322535e-05, "loss": 109.3304, "step": 5394, "task_loss": 1.897701621055603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9968261043968891, "compression/movement_sparsity/importance_threshold": -2.257623099636598e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198959935941463, "compression/movement_sparsity/model_sparsity": 0.8882947362831048, "compression_loss": 105.46133422851562, "distillation_loss": 6.209989547729492, "epoch": 4.56, "learning_rate": 3.021978021978022e-05, "loss": 110.2861, "step": 5395, "task_loss": 4.050910472869873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9968443102545098, "compression/movement_sparsity/importance_threshold": -2.244673094389804e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198626774697718, "compression/movement_sparsity/model_sparsity": 0.8882625646700949, "compression_loss": 105.46267700195312, "distillation_loss": 3.272695779800415, "epoch": 4.56, "learning_rate": 3.0215084061237908e-05, "loss": 109.0854, "step": 5396, "task_loss": 1.8521572351455688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9968624463579723, "compression/movement_sparsity/importance_threshold": -2.231772705960436e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198686157052545, "compression/movement_sparsity/model_sparsity": 0.8882682989089206, "compression_loss": 105.46395874023438, "distillation_loss": 3.189450263977051, "epoch": 4.56, "learning_rate": 3.0210387902695598e-05, "loss": 108.9309, "step": 5397, "task_loss": 1.3299849033355713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9968805128411616, "compression/movement_sparsity/importance_threshold": -2.2189218391147764e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198243531949901, "compression/movement_sparsity/model_sparsity": 0.8882255569520513, "compression_loss": 105.46531677246094, "distillation_loss": 3.7296829223632812, "epoch": 4.56, "learning_rate": 3.0205691744153288e-05, "loss": 109.0423, "step": 5398, "task_loss": 2.030233860015869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9968985098379629, "compression/movement_sparsity/importance_threshold": -2.2061203986191956e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199240153880907, "compression/movement_sparsity/model_sparsity": 0.888321795442222, "compression_loss": 105.46656036376953, "distillation_loss": 4.1854424476623535, "epoch": 4.56, "learning_rate": 3.020099558561097e-05, "loss": 109.3041, "step": 5399, "task_loss": 2.0825064182281494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9969164374822614, "compression/movement_sparsity/importance_threshold": -2.193368289239543e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199248739281605, "compression/movement_sparsity/model_sparsity": 0.8883226244887993, "compression_loss": 105.46778869628906, "distillation_loss": 4.4127960205078125, "epoch": 4.56, "learning_rate": 3.0196299427068657e-05, "loss": 109.5405, "step": 5400, "task_loss": 2.174792766571045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9969342959079422, "compression/movement_sparsity/importance_threshold": -2.180665415742189e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919918434877637, "compression/movement_sparsity/model_sparsity": 0.8883164066394702, "compression_loss": 105.46905517578125, "distillation_loss": 4.62826681137085, "epoch": 4.57, "learning_rate": 3.0191603268526347e-05, "loss": 109.7825, "step": 5401, "task_loss": 2.281991958618164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9969520852488906, "compression/movement_sparsity/importance_threshold": -2.168011682893243e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919910839182853, "compression/movement_sparsity/model_sparsity": 0.888309071880169, "compression_loss": 105.4703140258789, "distillation_loss": 7.456823348999023, "epoch": 4.57, "learning_rate": 3.0186907109984036e-05, "loss": 110.434, "step": 5402, "task_loss": 3.2102606296539307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9969698056389914, "compression/movement_sparsity/importance_threshold": -2.1554069954589014e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199199850194297, "compression/movement_sparsity/model_sparsity": 0.8883179035291234, "compression_loss": 105.4716567993164, "distillation_loss": 5.305333137512207, "epoch": 4.57, "learning_rate": 3.018221095144172e-05, "loss": 109.2797, "step": 5403, "task_loss": 2.9920554161071777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9969874572121299, "compression/movement_sparsity/importance_threshold": -2.1428512582055345e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199139156181031, "compression/movement_sparsity/model_sparsity": 0.8883120426304041, "compression_loss": 105.47283935546875, "distillation_loss": 3.421720504760742, "epoch": 4.57, "learning_rate": 3.017751479289941e-05, "loss": 108.7445, "step": 5404, "task_loss": 1.9557799100875854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970050401021913, "compression/movement_sparsity/importance_threshold": -2.1303443758991653e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199314322203601, "compression/movement_sparsity/model_sparsity": 0.8883289574834863, "compression_loss": 105.47406005859375, "distillation_loss": 4.300114154815674, "epoch": 4.57, "learning_rate": 3.01728186343571e-05, "loss": 109.7552, "step": 5405, "task_loss": 2.8733880519866943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970225544430608, "compression/movement_sparsity/importance_threshold": -2.1178862533058167e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199363688257614, "compression/movement_sparsity/model_sparsity": 0.8883337245013052, "compression_loss": 105.47525024414062, "distillation_loss": 5.649592399597168, "epoch": 4.57, "learning_rate": 3.0168122475814785e-05, "loss": 109.5718, "step": 5406, "task_loss": 2.7065823078155518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970400003686233, "compression/movement_sparsity/importance_threshold": -2.1054767951920322e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199689575759102, "compression/movement_sparsity/model_sparsity": 0.8883651937276317, "compression_loss": 105.47648620605469, "distillation_loss": 4.837080001831055, "epoch": 4.57, "learning_rate": 3.0163426317272475e-05, "loss": 109.9797, "step": 5407, "task_loss": 2.805223226547241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970573780127642, "compression/movement_sparsity/importance_threshold": -2.0931159063236615e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199931874845463, "compression/movement_sparsity/model_sparsity": 0.8883885912643662, "compression_loss": 105.47772216796875, "distillation_loss": 4.582499027252197, "epoch": 4.57, "learning_rate": 3.0158730158730158e-05, "loss": 109.1057, "step": 5408, "task_loss": 2.683685064315796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970746875093683, "compression/movement_sparsity/importance_threshold": -2.0808034914671612e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199901229734638, "compression/movement_sparsity/model_sparsity": 0.8883856320286669, "compression_loss": 105.47892761230469, "distillation_loss": 3.5515904426574707, "epoch": 4.57, "learning_rate": 3.0154034000187847e-05, "loss": 109.3885, "step": 5409, "task_loss": 1.580021619796753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970919289923212, "compression/movement_sparsity/importance_threshold": -2.0685394553885544e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200269209547881, "compression/movement_sparsity/model_sparsity": 0.888421165886129, "compression_loss": 105.48014831542969, "distillation_loss": 2.6108973026275635, "epoch": 4.57, "learning_rate": 3.0149337841645537e-05, "loss": 108.9983, "step": 5410, "task_loss": 0.7483007907867432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9971091025955078, "compression/movement_sparsity/importance_threshold": -2.0563237028540376e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200971781504986, "compression/movement_sparsity/model_sparsity": 0.8884890095310304, "compression_loss": 105.4813003540039, "distillation_loss": 3.6873888969421387, "epoch": 4.57, "learning_rate": 3.0144641683103224e-05, "loss": 109.1733, "step": 5411, "task_loss": 1.9699312448501587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9971262084528132, "compression/movement_sparsity/importance_threshold": -2.0441561386298074e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9201213842107995, "compression/movement_sparsity/model_sparsity": 0.8885123840386933, "compression_loss": 105.48252868652344, "distillation_loss": 4.098386287689209, "epoch": 4.57, "learning_rate": 3.013994552456091e-05, "loss": 109.3336, "step": 5412, "task_loss": 2.672476053237915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9971432466981225, "compression/movement_sparsity/importance_threshold": -2.032036667482147e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9201021147558999, "compression/movement_sparsity/model_sparsity": 0.8884937765488493, "compression_loss": 105.48379516601562, "distillation_loss": 3.784754514694214, "epoch": 4.58, "learning_rate": 3.0135249366018596e-05, "loss": 110.1095, "step": 5413, "task_loss": 1.983025074005127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997160217465321, "compression/movement_sparsity/importance_threshold": -2.019965194177166e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9201065028495898, "compression/movement_sparsity/model_sparsity": 0.8884980138980216, "compression_loss": 105.48502349853516, "distillation_loss": 4.59189510345459, "epoch": 4.58, "learning_rate": 3.0130553207476286e-05, "loss": 109.8022, "step": 5414, "task_loss": 2.4899115562438965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9971771208882938, "compression/movement_sparsity/importance_threshold": -2.0079416234809744e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9201276801713112, "compression/movement_sparsity/model_sparsity": 0.8885184637135928, "compression_loss": 105.48626708984375, "distillation_loss": 4.182824611663818, "epoch": 4.58, "learning_rate": 3.0125857048933976e-05, "loss": 109.6559, "step": 5415, "task_loss": 1.778895616531372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997193957100926, "compression/movement_sparsity/importance_threshold": -1.9959658601598554e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9201205614432326, "compression/movement_sparsity/model_sparsity": 0.8885115895357234, "compression_loss": 105.48754119873047, "distillation_loss": 5.198852062225342, "epoch": 4.58, "learning_rate": 3.012116089039166e-05, "loss": 109.4138, "step": 5416, "task_loss": 2.590548515319824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972107262371027, "compression/movement_sparsity/importance_threshold": -1.9840378089799188e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9201447317310305, "compression/movement_sparsity/model_sparsity": 0.888534929499779, "compression_loss": 105.48878479003906, "distillation_loss": 4.827425003051758, "epoch": 4.58, "learning_rate": 3.011646473184935e-05, "loss": 109.941, "step": 5417, "task_loss": 3.883917808532715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972274284307091, "compression/movement_sparsity/importance_threshold": -1.9721573747075345e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9201586114621586, "compression/movement_sparsity/model_sparsity": 0.8885483324194439, "compression_loss": 105.48998260498047, "distillation_loss": 2.8815741539001465, "epoch": 4.58, "learning_rate": 3.0111768573307038e-05, "loss": 109.1792, "step": 5418, "task_loss": 1.5894676446914673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972440638156304, "compression/movement_sparsity/importance_threshold": -1.960324462108639e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9201676619053942, "compression/movement_sparsity/model_sparsity": 0.8885570719521119, "compression_loss": 105.49120330810547, "distillation_loss": 4.136836051940918, "epoch": 4.58, "learning_rate": 3.0107072414764724e-05, "loss": 109.8738, "step": 5419, "task_loss": 3.011331081390381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972606325257516, "compression/movement_sparsity/importance_threshold": -1.9485389759495153e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202579755510683, "compression/movement_sparsity/model_sparsity": 0.8886442830462197, "compression_loss": 105.49249267578125, "distillation_loss": 2.980266571044922, "epoch": 4.58, "learning_rate": 3.0102376256222407e-05, "loss": 109.1975, "step": 5420, "task_loss": 2.2230541706085205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972771346949579, "compression/movement_sparsity/importance_threshold": -1.93680082099636e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202714856329997, "compression/movement_sparsity/model_sparsity": 0.8886573290152749, "compression_loss": 105.49372100830078, "distillation_loss": 4.4040913581848145, "epoch": 4.58, "learning_rate": 3.0097680097680097e-05, "loss": 109.6005, "step": 5421, "task_loss": 3.0315358638763428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972935704571345, "compression/movement_sparsity/importance_threshold": -1.9251099020154566e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9203020115021475, "compression/movement_sparsity/model_sparsity": 0.888686806226909, "compression_loss": 105.49494171142578, "distillation_loss": 5.2285919189453125, "epoch": 4.58, "learning_rate": 3.0092983939137787e-05, "loss": 109.2026, "step": 5422, "task_loss": 1.9199727773666382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9973099399461665, "compression/movement_sparsity/importance_threshold": -1.913466123772828e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.920332382357116, "compression/movement_sparsity/model_sparsity": 0.8887161337495777, "compression_loss": 105.49616241455078, "distillation_loss": 4.214203357696533, "epoch": 4.58, "learning_rate": 3.0088287780595477e-05, "loss": 109.8767, "step": 5423, "task_loss": 1.5114500522613525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997326243295939, "compression/movement_sparsity/importance_threshold": -1.901869391034757e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9203264798941363, "compression/movement_sparsity/model_sparsity": 0.8887104340543593, "compression_loss": 105.49737548828125, "distillation_loss": 5.798648357391357, "epoch": 4.58, "learning_rate": 3.0083591622053163e-05, "loss": 109.715, "step": 5424, "task_loss": 2.6350083351135254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9973424806403371, "compression/movement_sparsity/importance_threshold": -1.8903196085673542e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202683376527439, "compression/movement_sparsity/model_sparsity": 0.8886542891778252, "compression_loss": 105.4985580444336, "distillation_loss": 3.739445209503174, "epoch": 4.59, "learning_rate": 3.007889546351085e-05, "loss": 109.1829, "step": 5425, "task_loss": 1.7924686670303345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9973586521132461, "compression/movement_sparsity/importance_threshold": -1.8788166811369023e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202576774468774, "compression/movement_sparsity/model_sparsity": 0.8886439951828249, "compression_loss": 105.49974060058594, "distillation_loss": 4.300976753234863, "epoch": 4.59, "learning_rate": 3.0074199304968536e-05, "loss": 109.7821, "step": 5426, "task_loss": 1.8130395412445068 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9973747578485511, "compression/movement_sparsity/importance_threshold": -1.867360513509511e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202973849251048, "compression/movement_sparsity/model_sparsity": 0.8886823385870207, "compression_loss": 105.50088500976562, "distillation_loss": 5.28981876373291, "epoch": 4.59, "learning_rate": 3.0069503146426225e-05, "loss": 110.2037, "step": 5427, "task_loss": 2.4890551567077637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9973907979801372, "compression/movement_sparsity/importance_threshold": -1.8559510104513774e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9203398707343914, "compression/movement_sparsity/model_sparsity": 0.8887233648780566, "compression_loss": 105.50204467773438, "distillation_loss": 4.620241165161133, "epoch": 4.59, "learning_rate": 3.0064806987883915e-05, "loss": 108.7154, "step": 5428, "task_loss": 2.078974485397339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974067726418895, "compression/movement_sparsity/importance_threshold": -1.8445880767286975e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9203689180067524, "compression/movement_sparsity/model_sparsity": 0.8887514142872521, "compression_loss": 105.50318145751953, "distillation_loss": 4.281425952911377, "epoch": 4.59, "learning_rate": 3.0060110829341598e-05, "loss": 109.667, "step": 5429, "task_loss": 2.783029317855835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974226819676932, "compression/movement_sparsity/importance_threshold": -1.8332716171075812e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9203985376391598, "compression/movement_sparsity/model_sparsity": 0.8887800163941658, "compression_loss": 105.50431060791016, "distillation_loss": 3.4398491382598877, "epoch": 4.59, "learning_rate": 3.0055414670799288e-05, "loss": 109.6376, "step": 5430, "task_loss": 2.71669864654541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974385260914334, "compression/movement_sparsity/importance_threshold": -1.8220015363543986e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9204549747245804, "compression/movement_sparsity/model_sparsity": 0.8888345146920813, "compression_loss": 105.50544738769531, "distillation_loss": 3.137232542037964, "epoch": 4.59, "learning_rate": 3.0050718512256974e-05, "loss": 109.048, "step": 5431, "task_loss": 1.1035014390945435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974543051469952, "compression/movement_sparsity/importance_threshold": -1.8107777392351726e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9204939667527496, "compression/movement_sparsity/model_sparsity": 0.8888721672241294, "compression_loss": 105.50653839111328, "distillation_loss": 4.895781517028809, "epoch": 4.59, "learning_rate": 3.0046022353714664e-05, "loss": 109.8737, "step": 5432, "task_loss": 2.6815848350524902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974700192682638, "compression/movement_sparsity/importance_threshold": -1.7996001305161866e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205062724937498, "compression/movement_sparsity/model_sparsity": 0.8888840502250694, "compression_loss": 105.50765991210938, "distillation_loss": 4.776840686798096, "epoch": 4.59, "learning_rate": 3.0041326195172347e-05, "loss": 109.4416, "step": 5433, "task_loss": 3.030759334564209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974856685891245, "compression/movement_sparsity/importance_threshold": -1.7884686149635502e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.920520021059034, "compression/movement_sparsity/model_sparsity": 0.8888973264848405, "compression_loss": 105.50872039794922, "distillation_loss": 3.7616682052612305, "epoch": 4.59, "learning_rate": 3.0036630036630036e-05, "loss": 109.6811, "step": 5434, "task_loss": 1.8508557081222534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975012532434622, "compression/movement_sparsity/importance_threshold": -1.7773830973433734e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.920437696605676, "compression/movement_sparsity/model_sparsity": 0.888817830129715, "compression_loss": 105.509765625, "distillation_loss": 6.033050537109375, "epoch": 4.59, "learning_rate": 3.0031933878087726e-05, "loss": 110.3224, "step": 5435, "task_loss": 3.22739315032959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975167733651621, "compression/movement_sparsity/importance_threshold": -1.766343482422026e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9204564771697025, "compression/movement_sparsity/model_sparsity": 0.8888359655235915, "compression_loss": 105.51081848144531, "distillation_loss": 4.22309684753418, "epoch": 4.59, "learning_rate": 3.0027237719545416e-05, "loss": 109.5544, "step": 5436, "task_loss": 2.334439516067505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975322290881093, "compression/movement_sparsity/importance_threshold": -1.755349674965618e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205198302723518, "compression/movement_sparsity/model_sparsity": 0.8888971422522678, "compression_loss": 105.51188659667969, "distillation_loss": 4.493262767791748, "epoch": 4.6, "learning_rate": 3.0022541561003102e-05, "loss": 109.5274, "step": 5437, "task_loss": 2.844148874282837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975476205461891, "compression/movement_sparsity/importance_threshold": -1.7444015797404323e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205455983986129, "compression/movement_sparsity/model_sparsity": 0.8889220251641199, "compression_loss": 105.51287841796875, "distillation_loss": 3.6817104816436768, "epoch": 4.6, "learning_rate": 3.001784540246079e-05, "loss": 109.1002, "step": 5438, "task_loss": 2.8216168880462646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975629478732865, "compression/movement_sparsity/importance_threshold": -1.7334991015124056e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205579279879483, "compression/movement_sparsity/model_sparsity": 0.8889339311941314, "compression_loss": 105.51393127441406, "distillation_loss": 2.6702358722686768, "epoch": 4.6, "learning_rate": 3.0013149243918475e-05, "loss": 109.3731, "step": 5439, "task_loss": 1.581262469291687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975782112032866, "compression/movement_sparsity/importance_threshold": -1.7226421450479944e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205316709708142, "compression/movement_sparsity/model_sparsity": 0.8889085761863118, "compression_loss": 105.51493835449219, "distillation_loss": 5.64640474319458, "epoch": 4.6, "learning_rate": 3.0008453085376165e-05, "loss": 110.1727, "step": 5440, "task_loss": 3.5521676540374756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975934106700748, "compression/movement_sparsity/importance_threshold": -1.711830615113135e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205013120400133, "compression/movement_sparsity/model_sparsity": 0.888879260178179, "compression_loss": 105.51594543457031, "distillation_loss": 3.7465834617614746, "epoch": 4.6, "learning_rate": 3.0003756926833854e-05, "loss": 109.4302, "step": 5441, "task_loss": 2.954737663269043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976085464075359, "compression/movement_sparsity/importance_threshold": -1.7010644164741975e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205429631575653, "compression/movement_sparsity/model_sparsity": 0.8889194804517092, "compression_loss": 105.51697540283203, "distillation_loss": 4.890163421630859, "epoch": 4.6, "learning_rate": 2.9999060768291537e-05, "loss": 109.7372, "step": 5442, "task_loss": 2.2979278564453125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976236185495553, "compression/movement_sparsity/importance_threshold": -1.6903434538972917e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205744906567946, "compression/movement_sparsity/model_sparsity": 0.88894992488435, "compression_loss": 105.51787567138672, "distillation_loss": 4.7837700843811035, "epoch": 4.6, "learning_rate": 2.9994364609749227e-05, "loss": 109.5526, "step": 5443, "task_loss": 2.193290948867798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976386272300181, "compression/movement_sparsity/importance_threshold": -1.679667632148614e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205496168431062, "compression/movement_sparsity/model_sparsity": 0.8889259055626826, "compression_loss": 105.51888275146484, "distillation_loss": 3.905369520187378, "epoch": 4.6, "learning_rate": 2.9989668451206913e-05, "loss": 109.9637, "step": 5444, "task_loss": 2.1975152492523193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976535725828093, "compression/movement_sparsity/importance_threshold": -1.669036855994361e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9206168214519019, "compression/movement_sparsity/model_sparsity": 0.8889908014864206, "compression_loss": 105.51988983154297, "distillation_loss": 4.140334606170654, "epoch": 4.6, "learning_rate": 2.9984972292664603e-05, "loss": 109.7546, "step": 5445, "task_loss": 2.1042191982269287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976684547418141, "compression/movement_sparsity/importance_threshold": -1.658451030200729e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205933189174916, "compression/movement_sparsity/model_sparsity": 0.8889681063363696, "compression_loss": 105.52086639404297, "distillation_loss": 3.6962454319000244, "epoch": 4.6, "learning_rate": 2.9980276134122286e-05, "loss": 109.5569, "step": 5446, "task_loss": 2.869936466217041 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976832738409177, "compression/movement_sparsity/importance_threshold": -1.6479100595338282e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.920531909454167, "compression/movement_sparsity/model_sparsity": 0.8889088064770277, "compression_loss": 105.5218505859375, "distillation_loss": 3.015488862991333, "epoch": 4.6, "learning_rate": 2.9975579975579976e-05, "loss": 109.4178, "step": 5447, "task_loss": 1.6579548120498657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976980300140053, "compression/movement_sparsity/importance_threshold": -1.6374138487598548e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205546130693456, "compression/movement_sparsity/model_sparsity": 0.8889307301531805, "compression_loss": 105.5228500366211, "distillation_loss": 3.3694350719451904, "epoch": 4.6, "learning_rate": 2.9970883817037666e-05, "loss": 109.102, "step": 5448, "task_loss": 1.852138876914978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977127233949618, "compression/movement_sparsity/importance_threshold": -1.626962302645179e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9206405386213297, "compression/movement_sparsity/model_sparsity": 0.889013703898116, "compression_loss": 105.52371215820312, "distillation_loss": 4.0487775802612305, "epoch": 4.61, "learning_rate": 2.9966187658495355e-05, "loss": 109.832, "step": 5449, "task_loss": 2.3124136924743652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977273541176725, "compression/movement_sparsity/importance_threshold": -1.6165553259557368e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205698044589137, "compression/movement_sparsity/model_sparsity": 0.8889453996717828, "compression_loss": 105.5246353149414, "distillation_loss": 4.05060338973999, "epoch": 4.61, "learning_rate": 2.9961491499953038e-05, "loss": 109.6839, "step": 5450, "task_loss": 2.7092514038085938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977419223160227, "compression/movement_sparsity/importance_threshold": -1.6061928234578984e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205917091548608, "compression/movement_sparsity/model_sparsity": 0.8889665518740373, "compression_loss": 105.52550506591797, "distillation_loss": 4.748279571533203, "epoch": 4.61, "learning_rate": 2.9956795341410725e-05, "loss": 109.7232, "step": 5451, "task_loss": 2.5132384300231934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977564281238972, "compression/movement_sparsity/importance_threshold": -1.595874699917687e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9206172268736015, "compression/movement_sparsity/model_sparsity": 0.8889911929806377, "compression_loss": 105.52644348144531, "distillation_loss": 3.973663806915283, "epoch": 4.61, "learning_rate": 2.9952099182868414e-05, "loss": 110.2983, "step": 5452, "task_loss": 2.519516944885254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977708716751814, "compression/movement_sparsity/importance_threshold": -1.5856008601013855e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9206086891695743, "compression/movement_sparsity/model_sparsity": 0.8889829485730087, "compression_loss": 105.52738952636719, "distillation_loss": 4.2052717208862305, "epoch": 4.61, "learning_rate": 2.9947403024326104e-05, "loss": 110.0564, "step": 5453, "task_loss": 3.084160089492798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977852531037603, "compression/movement_sparsity/importance_threshold": -1.5753712087751906e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205562228319765, "compression/movement_sparsity/model_sparsity": 0.8889322846155128, "compression_loss": 105.52830505371094, "distillation_loss": 3.995290994644165, "epoch": 4.61, "learning_rate": 2.9942706865783794e-05, "loss": 109.8322, "step": 5454, "task_loss": 3.5377087593078613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977995725435191, "compression/movement_sparsity/importance_threshold": -1.565185650705299e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9204803135808065, "compression/movement_sparsity/model_sparsity": 0.8888589830806447, "compression_loss": 105.52924346923828, "distillation_loss": 4.8773298263549805, "epoch": 4.61, "learning_rate": 2.9938010707241477e-05, "loss": 110.3627, "step": 5455, "task_loss": 2.6841795444488525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997813830128343, "compression/movement_sparsity/importance_threshold": -1.55504409065782e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9204652414329149, "compression/movement_sparsity/model_sparsity": 0.8888444287074004, "compression_loss": 105.53009033203125, "distillation_loss": 3.2470531463623047, "epoch": 4.61, "learning_rate": 2.9933314548699166e-05, "loss": 109.1826, "step": 5456, "task_loss": 1.2016369104385376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978280259921171, "compression/movement_sparsity/importance_threshold": -1.5449464333989506e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9204999526849028, "compression/movement_sparsity/model_sparsity": 0.8888779475210984, "compression_loss": 105.53095245361328, "distillation_loss": 2.9993393421173096, "epoch": 4.61, "learning_rate": 2.9928618390156853e-05, "loss": 109.1879, "step": 5457, "task_loss": 1.6323597431182861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978421602687264, "compression/movement_sparsity/importance_threshold": -1.5348925836949738e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9204877900339142, "compression/movement_sparsity/model_sparsity": 0.8888662026945879, "compression_loss": 105.53182983398438, "distillation_loss": 4.159697532653809, "epoch": 4.61, "learning_rate": 2.9923922231614543e-05, "loss": 109.3457, "step": 5458, "task_loss": 1.6767222881317139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978562330920562, "compression/movement_sparsity/importance_threshold": -1.5248824463119995e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205507615631993, "compression/movement_sparsity/model_sparsity": 0.8889270109581189, "compression_loss": 105.53268432617188, "distillation_loss": 5.949195861816406, "epoch": 4.61, "learning_rate": 2.9919226073072225e-05, "loss": 110.5842, "step": 5459, "task_loss": 3.9671130180358887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978702445959916, "compression/movement_sparsity/importance_threshold": -1.5149159260161374e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9206010815506226, "compression/movement_sparsity/model_sparsity": 0.8889756022991718, "compression_loss": 105.53352355957031, "distillation_loss": 4.503609657287598, "epoch": 4.61, "learning_rate": 2.9914529914529915e-05, "loss": 109.1543, "step": 5460, "task_loss": 1.8863792419433594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978841949144177, "compression/movement_sparsity/importance_threshold": -1.5049929275736708e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9207407254778062, "compression/movement_sparsity/model_sparsity": 0.8891104490278617, "compression_loss": 105.5343246459961, "distillation_loss": 2.8260891437530518, "epoch": 4.62, "learning_rate": 2.9909833755987605e-05, "loss": 108.9631, "step": 5461, "task_loss": 1.4789564609527588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978980841812197, "compression/movement_sparsity/importance_threshold": -1.4951133557507962e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9207495851343596, "compression/movement_sparsity/model_sparsity": 0.8891190043279571, "compression_loss": 105.53515625, "distillation_loss": 3.887136220932007, "epoch": 4.62, "learning_rate": 2.9905137597445295e-05, "loss": 109.6239, "step": 5462, "task_loss": 2.444453716278076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9979119125302828, "compression/movement_sparsity/importance_threshold": -1.4852771153137101e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9207030451080769, "compression/movement_sparsity/model_sparsity": 0.889074063094751, "compression_loss": 105.53594970703125, "distillation_loss": 3.7007875442504883, "epoch": 4.62, "learning_rate": 2.9900441438902978e-05, "loss": 108.8644, "step": 5463, "task_loss": 2.0962140560150146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997925680095492, "compression/movement_sparsity/importance_threshold": -1.4754841110285223e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9207642041638812, "compression/movement_sparsity/model_sparsity": 0.8891331211488411, "compression_loss": 105.53671264648438, "distillation_loss": 5.225912094116211, "epoch": 4.62, "learning_rate": 2.9895745280360664e-05, "loss": 109.7049, "step": 5464, "task_loss": 3.591418504714966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9979393870107324, "compression/movement_sparsity/importance_threshold": -1.465734247661516e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9207561791990623, "compression/movement_sparsity/model_sparsity": 0.8891253718662514, "compression_loss": 105.53744506835938, "distillation_loss": 4.154509544372559, "epoch": 4.62, "learning_rate": 2.9891049121818354e-05, "loss": 109.8491, "step": 5465, "task_loss": 1.3699941635131836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9979530334098894, "compression/movement_sparsity/importance_threshold": -1.4560274299786277e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9207391157151753, "compression/movement_sparsity/model_sparsity": 0.8891088945655294, "compression_loss": 105.53824615478516, "distillation_loss": 4.527614116668701, "epoch": 4.62, "learning_rate": 2.9886352963276043e-05, "loss": 109.6676, "step": 5466, "task_loss": 2.1192588806152344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9979666194268478, "compression/movement_sparsity/importance_threshold": -1.4463635627464007e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9207931441187335, "compression/movement_sparsity/model_sparsity": 0.8891610669272145, "compression_loss": 105.53895568847656, "distillation_loss": 5.291153907775879, "epoch": 4.62, "learning_rate": 2.9881656804733733e-05, "loss": 110.1834, "step": 5467, "task_loss": 3.0343360900878906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997980145195493, "compression/movement_sparsity/importance_threshold": -1.436742550730858e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9208345686771005, "compression/movement_sparsity/model_sparsity": 0.8892010684245647, "compression_loss": 105.53972625732422, "distillation_loss": 5.4952921867370605, "epoch": 4.62, "learning_rate": 2.9876960646191416e-05, "loss": 110.0998, "step": 5468, "task_loss": 2.5103302001953125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9979936108497102, "compression/movement_sparsity/importance_threshold": -1.4271642986980228e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9208643075511842, "compression/movement_sparsity/model_sparsity": 0.8892297856768363, "compression_loss": 105.54043579101562, "distillation_loss": 5.163739204406738, "epoch": 4.62, "learning_rate": 2.9872264487649106e-05, "loss": 109.9092, "step": 5469, "task_loss": 2.339810609817505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980070165233843, "compression/movement_sparsity/importance_threshold": -1.4176287114143517e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9208509524834321, "compression/movement_sparsity/model_sparsity": 0.8892168893967465, "compression_loss": 105.54125213623047, "distillation_loss": 3.7556233406066895, "epoch": 4.62, "learning_rate": 2.9867568329106792e-05, "loss": 109.6182, "step": 5470, "task_loss": 2.932100296020508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980203623504005, "compression/movement_sparsity/importance_threshold": -1.4081356936458678e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9208755281929296, "compression/movement_sparsity/model_sparsity": 0.889240620855019, "compression_loss": 105.54200744628906, "distillation_loss": 4.5820841789245605, "epoch": 4.62, "learning_rate": 2.9862872170564482e-05, "loss": 109.1709, "step": 5471, "task_loss": 2.087576150894165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998033648464644, "compression/movement_sparsity/importance_threshold": -1.3986851501587676e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209107044874554, "compression/movement_sparsity/model_sparsity": 0.8892745887356129, "compression_loss": 105.54273223876953, "distillation_loss": 4.086814880371094, "epoch": 4.63, "learning_rate": 2.9858176012022165e-05, "loss": 109.2385, "step": 5472, "task_loss": 2.6935770511627197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998046875, "compression/movement_sparsity/importance_threshold": -1.3892769857193343e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209107044874554, "compression/movement_sparsity/model_sparsity": 0.8892745887356129, "compression_loss": 105.54347229003906, "distillation_loss": 3.844280242919922, "epoch": 4.63, "learning_rate": 2.9853479853479855e-05, "loss": 109.4622, "step": 5473, "task_loss": 2.672255277633667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980600420903535, "compression/movement_sparsity/importance_threshold": -1.3799111050936778e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9208958469745812, "compression/movement_sparsity/model_sparsity": 0.8892602416240128, "compression_loss": 105.54422760009766, "distillation_loss": 4.255912780761719, "epoch": 4.63, "learning_rate": 2.9848783694937544e-05, "loss": 110.0141, "step": 5474, "task_loss": 3.170166492462158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980731498695897, "compression/movement_sparsity/importance_threshold": -1.3705874130479946e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209067098912974, "compression/movement_sparsity/model_sparsity": 0.8892707313661217, "compression_loss": 105.54496002197266, "distillation_loss": 3.182572603225708, "epoch": 4.63, "learning_rate": 2.984408753639523e-05, "loss": 109.1098, "step": 5475, "task_loss": 1.8049278259277344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980861984715937, "compression/movement_sparsity/importance_threshold": -1.3613058143484812e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209598439822828, "compression/movement_sparsity/model_sparsity": 0.8893220401376222, "compression_loss": 105.5456771850586, "distillation_loss": 4.037733554840088, "epoch": 4.63, "learning_rate": 2.9839391377852917e-05, "loss": 109.6202, "step": 5476, "task_loss": 2.377817153930664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980991880302509, "compression/movement_sparsity/importance_threshold": -1.3520662137611605e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209526775575336, "compression/movement_sparsity/model_sparsity": 0.8893151199016097, "compression_loss": 105.5464096069336, "distillation_loss": 3.802335023880005, "epoch": 4.63, "learning_rate": 2.9834695219310603e-05, "loss": 109.7994, "step": 5477, "task_loss": 3.0593879222869873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998112118679446, "compression/movement_sparsity/importance_threshold": -1.3428685160524895e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210065151744096, "compression/movement_sparsity/model_sparsity": 0.889367108030722, "compression_loss": 105.54708099365234, "distillation_loss": 4.458230018615723, "epoch": 4.63, "learning_rate": 2.9829999060768293e-05, "loss": 109.6604, "step": 5478, "task_loss": 2.9682295322418213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981249905530645, "compression/movement_sparsity/importance_threshold": -1.3337126259885777e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209662353361356, "compression/movement_sparsity/model_sparsity": 0.8893282119288081, "compression_loss": 105.54780578613281, "distillation_loss": 4.794845104217529, "epoch": 4.63, "learning_rate": 2.9825302902225983e-05, "loss": 109.6028, "step": 5479, "task_loss": 3.0214762687683105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981378037849913, "compression/movement_sparsity/importance_threshold": -1.3245984483355351e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209787795604886, "compression/movement_sparsity/model_sparsity": 0.8893403252204639, "compression_loss": 105.54849243164062, "distillation_loss": 4.473118305206299, "epoch": 4.63, "learning_rate": 2.9820606743683666e-05, "loss": 109.3144, "step": 5480, "task_loss": 2.1170542240142822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981505585091117, "compression/movement_sparsity/importance_threshold": -1.3155258878594714e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209959503618842, "compression/movement_sparsity/model_sparsity": 0.889356906152008, "compression_loss": 105.54920959472656, "distillation_loss": 4.932757377624512, "epoch": 4.63, "learning_rate": 2.9815910585141355e-05, "loss": 109.9868, "step": 5481, "task_loss": 2.136474132537842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981632548593108, "compression/movement_sparsity/importance_threshold": -1.3064948493267566e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.92104802320195, "compression/movement_sparsity/model_sparsity": 0.8894071901298228, "compression_loss": 105.54984283447266, "distillation_loss": 4.322542190551758, "epoch": 4.63, "learning_rate": 2.9811214426599042e-05, "loss": 109.5154, "step": 5482, "task_loss": 2.818037986755371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981758929694737, "compression/movement_sparsity/importance_threshold": -1.2975052375035004e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210417153172706, "compression/movement_sparsity/model_sparsity": 0.8894010989403875, "compression_loss": 105.5505142211914, "distillation_loss": 4.954678058624268, "epoch": 4.63, "learning_rate": 2.980651826805673e-05, "loss": 109.5209, "step": 5483, "task_loss": 3.3495376110076904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981884729734857, "compression/movement_sparsity/importance_threshold": -1.2885569571558127e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210965426400604, "compression/movement_sparsity/model_sparsity": 0.8894540427759708, "compression_loss": 105.55117797851562, "distillation_loss": 4.660186767578125, "epoch": 4.64, "learning_rate": 2.980182210951442e-05, "loss": 109.7088, "step": 5484, "task_loss": 2.777808427810669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982009950052316, "compression/movement_sparsity/importance_threshold": -1.2796499130500634e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210954575408055, "compression/movement_sparsity/model_sparsity": 0.8894529949532135, "compression_loss": 105.55182647705078, "distillation_loss": 4.943071365356445, "epoch": 4.64, "learning_rate": 2.9797125950972104e-05, "loss": 110.5262, "step": 5485, "task_loss": 2.3689029216766357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982134591985969, "compression/movement_sparsity/importance_threshold": -1.2707840099521889e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210932992664633, "compression/movement_sparsity/model_sparsity": 0.8894509108222347, "compression_loss": 105.55248260498047, "distillation_loss": 4.1200971603393555, "epoch": 4.64, "learning_rate": 2.9792429792429794e-05, "loss": 109.2146, "step": 5486, "task_loss": 3.1247596740722656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982258656874666, "compression/movement_sparsity/importance_threshold": -1.2619591526285591e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211275097034106, "compression/movement_sparsity/model_sparsity": 0.8894839460254292, "compression_loss": 105.5531234741211, "distillation_loss": 6.046497821807861, "epoch": 4.64, "learning_rate": 2.9787733633887484e-05, "loss": 110.015, "step": 5487, "task_loss": 2.9242913722991943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982382146057258, "compression/movement_sparsity/importance_threshold": -1.2531752458452838e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211033155672775, "compression/movement_sparsity/model_sparsity": 0.8894605830323021, "compression_loss": 105.5538330078125, "distillation_loss": 2.6741085052490234, "epoch": 4.64, "learning_rate": 2.978303747534517e-05, "loss": 109.5304, "step": 5488, "task_loss": 1.6398943662643433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982505060872596, "compression/movement_sparsity/importance_threshold": -1.2444321943686464e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211216549371014, "compression/movement_sparsity/model_sparsity": 0.8894782923883541, "compression_loss": 105.55445861816406, "distillation_loss": 3.5110092163085938, "epoch": 4.64, "learning_rate": 2.9778341316802856e-05, "loss": 109.3195, "step": 5489, "task_loss": 1.9002059698104858 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982627402659533, "compression/movement_sparsity/importance_threshold": -1.2357299029647564e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210613663455345, "compression/movement_sparsity/model_sparsity": 0.8894200748953769, "compression_loss": 105.55513000488281, "distillation_loss": 5.2430572509765625, "epoch": 4.64, "learning_rate": 2.9773645158260543e-05, "loss": 109.5453, "step": 5490, "task_loss": 3.5156707763671875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998274917275692, "compression/movement_sparsity/importance_threshold": -1.2270682763997239e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210964949433897, "compression/movement_sparsity/model_sparsity": 0.8894539967178277, "compression_loss": 105.55581665039062, "distillation_loss": 3.1506285667419434, "epoch": 4.64, "learning_rate": 2.9768948999718232e-05, "loss": 109.5934, "step": 5491, "task_loss": 2.115549325942993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982870372503607, "compression/movement_sparsity/importance_threshold": -1.2184472194398319e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210867052017607, "compression/movement_sparsity/model_sparsity": 0.8894445432839403, "compression_loss": 105.5564956665039, "distillation_loss": 2.416412353515625, "epoch": 4.64, "learning_rate": 2.9764252841175922e-05, "loss": 109.1581, "step": 5492, "task_loss": 1.2523891925811768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982991003238446, "compression/movement_sparsity/importance_threshold": -1.209866636851277e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211172191467409, "compression/movement_sparsity/model_sparsity": 0.8894740089810386, "compression_loss": 105.55718994140625, "distillation_loss": 5.537360191345215, "epoch": 4.64, "learning_rate": 2.9759556682633605e-05, "loss": 110.0461, "step": 5493, "task_loss": 2.7346689701080322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998311106630029, "compression/movement_sparsity/importance_threshold": -1.201326433400169e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211448116706503, "compression/movement_sparsity/model_sparsity": 0.8895006536168671, "compression_loss": 105.55782318115234, "distillation_loss": 3.9417214393615723, "epoch": 4.64, "learning_rate": 2.9754860524091295e-05, "loss": 109.8728, "step": 5494, "task_loss": 3.5602359771728516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983230563027988, "compression/movement_sparsity/importance_threshold": -1.1928265138527912e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211470414899982, "compression/movement_sparsity/model_sparsity": 0.8895028068350607, "compression_loss": 105.55854034423828, "distillation_loss": 4.076042652130127, "epoch": 4.64, "learning_rate": 2.975016436554898e-05, "loss": 109.6105, "step": 5495, "task_loss": 1.3658392429351807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983349494760394, "compression/movement_sparsity/importance_threshold": -1.1843667829751665e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211825158887149, "compression/movement_sparsity/model_sparsity": 0.8895370625790494, "compression_loss": 105.55917358398438, "distillation_loss": 3.8640711307525635, "epoch": 4.65, "learning_rate": 2.974546820700667e-05, "loss": 109.8621, "step": 5496, "task_loss": 2.540809154510498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983467862836357, "compression/movement_sparsity/importance_threshold": -1.175947145533665e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211943446630096, "compression/movement_sparsity/model_sparsity": 0.8895484849985577, "compression_loss": 105.55974578857422, "distillation_loss": 5.186282157897949, "epoch": 4.65, "learning_rate": 2.9740772048464354e-05, "loss": 109.5971, "step": 5497, "task_loss": 3.628201961517334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998358566859473, "compression/movement_sparsity/importance_threshold": -1.1675675062943966e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211756833406596, "compression/movement_sparsity/model_sparsity": 0.8895304647500392, "compression_loss": 105.56038665771484, "distillation_loss": 3.627009868621826, "epoch": 4.65, "learning_rate": 2.9736075889922044e-05, "loss": 109.1779, "step": 5498, "task_loss": 2.2474334239959717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983702913374364, "compression/movement_sparsity/importance_threshold": -1.1592277700234709e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211878936883187, "compression/movement_sparsity/model_sparsity": 0.8895422556346928, "compression_loss": 105.56096649169922, "distillation_loss": 5.931939125061035, "epoch": 4.65, "learning_rate": 2.9731379731379733e-05, "loss": 110.2137, "step": 5499, "task_loss": 2.7637648582458496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983819598514109, "compression/movement_sparsity/importance_threshold": -1.1509278414872579e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211399227619196, "compression/movement_sparsity/model_sparsity": 0.8894959326571914, "compression_loss": 105.56155395507812, "distillation_loss": 4.957468032836914, "epoch": 4.65, "learning_rate": 2.9726683572837423e-05, "loss": 110.1265, "step": 5500, "task_loss": 2.9151554107666016 }, { "epoch": 4.65, "eval_accuracy": 0.5635643564356435, "eval_loss": 109.36628723144531, "eval_runtime": 227.9467, "eval_samples_per_second": 110.772, "eval_steps_per_second": 0.869, "step": 5500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983935725352818, "compression/movement_sparsity/importance_threshold": -1.1426676254516939e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210971150001068, "compression/movement_sparsity/model_sparsity": 0.889454595473689, "compression_loss": 105.56216430664062, "distillation_loss": 5.884405136108398, "epoch": 4.65, "learning_rate": 2.972198741429511e-05, "loss": 110.3774, "step": 5501, "task_loss": 2.9804883003234863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984051295229343, "compression/movement_sparsity/importance_threshold": -1.1344470266832357e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210954217683025, "compression/movement_sparsity/model_sparsity": 0.8894529604096061, "compression_loss": 105.56278228759766, "distillation_loss": 5.089557647705078, "epoch": 4.65, "learning_rate": 2.9717291255752792e-05, "loss": 109.6078, "step": 5502, "task_loss": 2.93888783454895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984166309482533, "compression/movement_sparsity/importance_threshold": -1.1262659499479064e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210610801655112, "compression/movement_sparsity/model_sparsity": 0.8894197985465179, "compression_loss": 105.56333923339844, "distillation_loss": 4.86373233795166, "epoch": 4.65, "learning_rate": 2.9712595097210482e-05, "loss": 109.765, "step": 5503, "task_loss": 3.11025333404541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984280769451241, "compression/movement_sparsity/importance_threshold": -1.1181243000118156e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210843919132393, "compression/movement_sparsity/model_sparsity": 0.8894423094639962, "compression_loss": 105.56399536132812, "distillation_loss": 4.782934188842773, "epoch": 4.65, "learning_rate": 2.9707898938668172e-05, "loss": 110.1018, "step": 5504, "task_loss": 2.6604340076446533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998439467647432, "compression/movement_sparsity/importance_threshold": -1.1100219816413334e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211828378412411, "compression/movement_sparsity/model_sparsity": 0.8895373734715158, "compression_loss": 105.56453704833984, "distillation_loss": 4.015327453613281, "epoch": 4.65, "learning_rate": 2.970320278012586e-05, "loss": 109.6904, "step": 5505, "task_loss": 2.6046464443206787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984508031890618, "compression/movement_sparsity/importance_threshold": -1.1019588996025696e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211087887602224, "compression/movement_sparsity/model_sparsity": 0.8894658682042318, "compression_loss": 105.56514739990234, "distillation_loss": 4.290561676025391, "epoch": 4.65, "learning_rate": 2.9698506621583544e-05, "loss": 109.2332, "step": 5506, "task_loss": 2.6484534740448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984620837038989, "compression/movement_sparsity/importance_threshold": -1.093934958661634e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211453601823616, "compression/movement_sparsity/model_sparsity": 0.8895011832855136, "compression_loss": 105.56571197509766, "distillation_loss": 3.42913556098938, "epoch": 4.65, "learning_rate": 2.9693810463041234e-05, "loss": 109.4469, "step": 5507, "task_loss": 2.467008352279663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984733093258282, "compression/movement_sparsity/importance_threshold": -1.0859500635848965e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211684930675751, "compression/movement_sparsity/model_sparsity": 0.8895235214849551, "compression_loss": 105.56631469726562, "distillation_loss": 4.897744178771973, "epoch": 4.66, "learning_rate": 2.968911430449892e-05, "loss": 109.8016, "step": 5508, "task_loss": 2.6290814876556396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984844801887351, "compression/movement_sparsity/importance_threshold": -1.0780041191382934e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212020476753024, "compression/movement_sparsity/model_sparsity": 0.889555923388681, "compression_loss": 105.56684112548828, "distillation_loss": 4.00047492980957, "epoch": 4.66, "learning_rate": 2.968441814595661e-05, "loss": 109.5868, "step": 5509, "task_loss": 2.6256988048553467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984955964265047, "compression/movement_sparsity/importance_threshold": -1.0700970300881948e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212786127556923, "compression/movement_sparsity/model_sparsity": 0.8896298582230177, "compression_loss": 105.56742858886719, "distillation_loss": 3.4887101650238037, "epoch": 4.66, "learning_rate": 2.9679721987414293e-05, "loss": 109.1773, "step": 5510, "task_loss": 2.2790486812591553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985066581730219, "compression/movement_sparsity/importance_threshold": -1.0622287012007972e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.921300004712431, "compression/movement_sparsity/model_sparsity": 0.8896505153002331, "compression_loss": 105.5679702758789, "distillation_loss": 3.6663570404052734, "epoch": 4.66, "learning_rate": 2.9675025828871983e-05, "loss": 109.4674, "step": 5511, "task_loss": 2.153451919555664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985176655621721, "compression/movement_sparsity/importance_threshold": -1.0543990372421236e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212940664769483, "compression/movement_sparsity/model_sparsity": 0.8896447810614074, "compression_loss": 105.56846618652344, "distillation_loss": 3.81044340133667, "epoch": 4.66, "learning_rate": 2.9670329670329673e-05, "loss": 110.1085, "step": 5512, "task_loss": 1.1724814176559448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985286187278404, "compression/movement_sparsity/importance_threshold": -1.0466079429784572e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9213364091962234, "compression/movement_sparsity/model_sparsity": 0.8896856691780138, "compression_loss": 105.56900787353516, "distillation_loss": 6.264140605926514, "epoch": 4.66, "learning_rate": 2.9665633511787362e-05, "loss": 110.0321, "step": 5513, "task_loss": 4.078514575958252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985395178039117, "compression/movement_sparsity/importance_threshold": -1.0388553231760814e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9213215755316844, "compression/movement_sparsity/model_sparsity": 0.8896713450954854, "compression_loss": 105.56951904296875, "distillation_loss": 4.771422863006592, "epoch": 4.66, "learning_rate": 2.966093735324505e-05, "loss": 110.2464, "step": 5514, "task_loss": 1.4382330179214478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985503629242715, "compression/movement_sparsity/importance_threshold": -1.031141082601019e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211980411549768, "compression/movement_sparsity/model_sparsity": 0.8895520545046539, "compression_loss": 105.57005310058594, "distillation_loss": 3.3453893661499023, "epoch": 4.66, "learning_rate": 2.965624119470273e-05, "loss": 110.0421, "step": 5515, "task_loss": 1.863745927810669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985611542228048, "compression/movement_sparsity/importance_threshold": -1.0234651260194669e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212161897381186, "compression/movement_sparsity/model_sparsity": 0.8895695796281332, "compression_loss": 105.57064819335938, "distillation_loss": 3.316615343093872, "epoch": 4.66, "learning_rate": 2.965154503616042e-05, "loss": 109.3595, "step": 5516, "task_loss": 2.2219417095184326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985718918333966, "compression/movement_sparsity/importance_threshold": -1.0158273581977947e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212609769117589, "compression/movement_sparsity/model_sparsity": 0.8896128282245775, "compression_loss": 105.57109832763672, "distillation_loss": 3.952594518661499, "epoch": 4.66, "learning_rate": 2.964684887761811e-05, "loss": 109.0928, "step": 5517, "task_loss": 2.7038230895996094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985825758899322, "compression/movement_sparsity/importance_threshold": -1.008227683901939e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9213202042524062, "compression/movement_sparsity/model_sparsity": 0.889670020923869, "compression_loss": 105.57166290283203, "distillation_loss": 3.7496278285980225, "epoch": 4.66, "learning_rate": 2.96421527190758e-05, "loss": 109.3641, "step": 5518, "task_loss": 3.0683722496032715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985932065262967, "compression/movement_sparsity/importance_threshold": -1.0006660078981829e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9213038681427451, "compression/movement_sparsity/model_sparsity": 0.8896542460098305, "compression_loss": 105.57217407226562, "distillation_loss": 3.7039690017700195, "epoch": 4.66, "learning_rate": 2.9637456560533484e-05, "loss": 109.7924, "step": 5519, "task_loss": 2.0124638080596924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986037838763752, "compression/movement_sparsity/importance_threshold": -9.931422349528096e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9213021629867731, "compression/movement_sparsity/model_sparsity": 0.8896525994312119, "compression_loss": 105.5726547241211, "distillation_loss": 4.012373924255371, "epoch": 4.67, "learning_rate": 2.9632760401991173e-05, "loss": 109.7029, "step": 5520, "task_loss": 1.0864660739898682 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986143080740528, "compression/movement_sparsity/importance_threshold": -9.85656269831929e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9212845867636779, "compression/movement_sparsity/model_sparsity": 0.8896356270054507, "compression_loss": 105.57318115234375, "distillation_loss": 5.847141742706299, "epoch": 4.67, "learning_rate": 2.962806424344886e-05, "loss": 110.2989, "step": 5521, "task_loss": 1.9670398235321045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986247792532149, "compression/movement_sparsity/importance_threshold": -9.78208017301651e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9213615095690968, "compression/movement_sparsity/model_sparsity": 0.8897099072758613, "compression_loss": 105.57363891601562, "distillation_loss": 4.05030632019043, "epoch": 4.67, "learning_rate": 2.962336808490655e-05, "loss": 110.1203, "step": 5522, "task_loss": 2.3449554443359375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986351975477463, "compression/movement_sparsity/importance_threshold": -9.707973821283453e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9214185547870668, "compression/movement_sparsity/model_sparsity": 0.8897649928151024, "compression_loss": 105.57414245605469, "distillation_loss": 4.418947219848633, "epoch": 4.67, "learning_rate": 2.9618671926364233e-05, "loss": 109.2299, "step": 5523, "task_loss": 2.386338710784912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986455630915324, "compression/movement_sparsity/importance_threshold": -9.634242690779483e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9214462427043173, "compression/movement_sparsity/model_sparsity": 0.8897917295672173, "compression_loss": 105.57463073730469, "distillation_loss": 4.986706733703613, "epoch": 4.67, "learning_rate": 2.9613975767821922e-05, "loss": 110.1645, "step": 5524, "task_loss": 2.391794443130493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986558760184582, "compression/movement_sparsity/importance_threshold": -9.560885829168302e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9215499233419111, "compression/movement_sparsity/model_sparsity": 0.8898918484559508, "compression_loss": 105.57508087158203, "distillation_loss": 4.082300662994385, "epoch": 4.67, "learning_rate": 2.9609279609279612e-05, "loss": 109.622, "step": 5525, "task_loss": 2.6163346767425537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986661364624089, "compression/movement_sparsity/importance_threshold": -9.487902284111872e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9215571613116661, "compression/movement_sparsity/model_sparsity": 0.8898988377791781, "compression_loss": 105.5755386352539, "distillation_loss": 2.560070037841797, "epoch": 4.67, "learning_rate": 2.9604583450737298e-05, "loss": 108.9346, "step": 5526, "task_loss": 1.4605498313903809 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986763445572695, "compression/movement_sparsity/importance_threshold": -9.415291103271294e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9215297595744388, "compression/movement_sparsity/model_sparsity": 0.8898723773759223, "compression_loss": 105.57595825195312, "distillation_loss": 4.479663848876953, "epoch": 4.67, "learning_rate": 2.9599887292194985e-05, "loss": 109.704, "step": 5527, "task_loss": 2.8612828254699707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986865004369254, "compression/movement_sparsity/importance_threshold": -9.343051334307663e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9215449867365099, "compression/movement_sparsity/model_sparsity": 0.8898870814381319, "compression_loss": 105.57642364501953, "distillation_loss": 3.736069679260254, "epoch": 4.67, "learning_rate": 2.959519113365267e-05, "loss": 109.4333, "step": 5528, "task_loss": 2.797093391418457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986966042352615, "compression/movement_sparsity/importance_threshold": -9.27118202488468e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216027951012086, "compression/movement_sparsity/model_sparsity": 0.8899429039076638, "compression_loss": 105.57685852050781, "distillation_loss": 4.133054256439209, "epoch": 4.67, "learning_rate": 2.959049497511036e-05, "loss": 109.1225, "step": 5529, "task_loss": 1.9220129251480103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987066560861632, "compression/movement_sparsity/importance_threshold": -9.19968222266171e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216613546884682, "compression/movement_sparsity/model_sparsity": 0.8899994517929508, "compression_loss": 105.57726287841797, "distillation_loss": 3.5606894493103027, "epoch": 4.67, "learning_rate": 2.958579881656805e-05, "loss": 109.4343, "step": 5530, "task_loss": 1.5954753160476685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987166561235153, "compression/movement_sparsity/importance_threshold": -9.128550975303316e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216517080368508, "compression/movement_sparsity/model_sparsity": 0.889990136533493, "compression_loss": 105.57769775390625, "distillation_loss": 4.8129563331604, "epoch": 4.67, "learning_rate": 2.958110265802574e-05, "loss": 109.9168, "step": 5531, "task_loss": 2.671309471130371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987266044812032, "compression/movement_sparsity/importance_threshold": -9.0577873304706e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216724560885372, "compression/movement_sparsity/model_sparsity": 0.8900101718257756, "compression_loss": 105.57804107666016, "distillation_loss": 4.292448043823242, "epoch": 4.68, "learning_rate": 2.9576406499483423e-05, "loss": 110.0562, "step": 5532, "task_loss": 3.2544970512390137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998736501293112, "compression/movement_sparsity/importance_threshold": -8.98739033582379e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.92170583183375, "compression/movement_sparsity/model_sparsity": 0.8900424010114645, "compression_loss": 105.57850646972656, "distillation_loss": 5.27227783203125, "epoch": 4.68, "learning_rate": 2.957171034094111e-05, "loss": 110.023, "step": 5533, "task_loss": 2.9766767024993896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987463466931267, "compression/movement_sparsity/importance_threshold": -8.917359039027455e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216602695892134, "compression/movement_sparsity/model_sparsity": 0.8899984039701935, "compression_loss": 105.57890319824219, "distillation_loss": 4.1036834716796875, "epoch": 4.68, "learning_rate": 2.95670141823988e-05, "loss": 109.1726, "step": 5534, "task_loss": 2.542269229888916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987561408151326, "compression/movement_sparsity/importance_threshold": -8.847692487740956e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216946469645075, "compression/movement_sparsity/model_sparsity": 0.8900316003768892, "compression_loss": 105.57933044433594, "distillation_loss": 3.677091598510742, "epoch": 4.68, "learning_rate": 2.956231802385649e-05, "loss": 109.8788, "step": 5535, "task_loss": 2.3868680000305176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987658837930148, "compression/movement_sparsity/importance_threshold": -8.778389729627127e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9217733106984014, "compression/movement_sparsity/model_sparsity": 0.8901075617695258, "compression_loss": 105.57972717285156, "distillation_loss": 3.804729700088501, "epoch": 4.68, "learning_rate": 2.9557621865314172e-05, "loss": 109.2046, "step": 5536, "task_loss": 2.790321111679077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987755757606583, "compression/movement_sparsity/importance_threshold": -8.709449812347933e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9217945237926256, "compression/movement_sparsity/model_sparsity": 0.8901280461287042, "compression_loss": 105.58016967773438, "distillation_loss": 4.175415992736816, "epoch": 4.68, "learning_rate": 2.955292570677186e-05, "loss": 108.9459, "step": 5537, "task_loss": 2.186868906021118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987852168519485, "compression/movement_sparsity/importance_threshold": -8.640871783565339e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9218573522318989, "compression/movement_sparsity/model_sparsity": 0.8901887162178057, "compression_loss": 105.58059692382812, "distillation_loss": 3.1539692878723145, "epoch": 4.68, "learning_rate": 2.954822954822955e-05, "loss": 109.6666, "step": 5538, "task_loss": 1.6384248733520508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987948072007704, "compression/movement_sparsity/importance_threshold": -8.572654690940443e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9219303162136627, "compression/movement_sparsity/model_sparsity": 0.8902591736623325, "compression_loss": 105.5810317993164, "distillation_loss": 4.4168477058410645, "epoch": 4.68, "learning_rate": 2.9543533389687238e-05, "loss": 109.6954, "step": 5539, "task_loss": 2.3842477798461914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988043469410092, "compression/movement_sparsity/importance_threshold": -8.504797582136077e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9219531748430208, "compression/movement_sparsity/model_sparsity": 0.8902812470274506, "compression_loss": 105.58143615722656, "distillation_loss": 3.4539570808410645, "epoch": 4.68, "learning_rate": 2.9538837231144924e-05, "loss": 109.3547, "step": 5540, "task_loss": 2.4469308853149414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988138362065498, "compression/movement_sparsity/importance_threshold": -8.43729950481334e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9219792529976404, "compression/movement_sparsity/model_sparsity": 0.8903064293172332, "compression_loss": 105.58184814453125, "distillation_loss": 4.28734016418457, "epoch": 4.68, "learning_rate": 2.953414107260261e-05, "loss": 109.4174, "step": 5541, "task_loss": 2.1914634704589844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988232751312777, "compression/movement_sparsity/importance_threshold": -8.370159506634195e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9219975208224584, "compression/movement_sparsity/model_sparsity": 0.8903240695860705, "compression_loss": 105.58222961425781, "distillation_loss": 3.893965244293213, "epoch": 4.68, "learning_rate": 2.95294449140603e-05, "loss": 109.9103, "step": 5542, "task_loss": 1.91208815574646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988326638490778, "compression/movement_sparsity/importance_threshold": -8.303376635261477e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9220629248819412, "compression/movement_sparsity/model_sparsity": 0.8903872268149036, "compression_loss": 105.5826416015625, "distillation_loss": 4.336143493652344, "epoch": 4.69, "learning_rate": 2.952474875551799e-05, "loss": 109.7888, "step": 5543, "task_loss": 2.3761515617370605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988420024938354, "compression/movement_sparsity/importance_threshold": -8.236949938354547e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9220767092197283, "compression/movement_sparsity/model_sparsity": 0.8904005376182821, "compression_loss": 105.58305358886719, "distillation_loss": 3.38543438911438, "epoch": 4.69, "learning_rate": 2.952005259697568e-05, "loss": 109.2673, "step": 5544, "task_loss": 1.9144207239151 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988512911994354, "compression/movement_sparsity/importance_threshold": -8.170878463578841e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221160112762561, "compression/movement_sparsity/model_sparsity": 0.8904384895282609, "compression_loss": 105.5834732055664, "distillation_loss": 3.4697425365448, "epoch": 4.69, "learning_rate": 2.9515356438433362e-05, "loss": 109.7147, "step": 5545, "task_loss": 1.526671051979065 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988605300997632, "compression/movement_sparsity/importance_threshold": -8.105161258592855e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922183108567543, "compression/movement_sparsity/model_sparsity": 0.8905032818211768, "compression_loss": 105.5838851928711, "distillation_loss": 4.158267974853516, "epoch": 4.69, "learning_rate": 2.951066027989105e-05, "loss": 109.6983, "step": 5546, "task_loss": 2.7130162715911865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988697193287037, "compression/movement_sparsity/importance_threshold": -8.039797371061155e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221972864028621, "compression/movement_sparsity/model_sparsity": 0.8905169726042365, "compression_loss": 105.58427429199219, "distillation_loss": 4.23112678527832, "epoch": 4.69, "learning_rate": 2.950596412134874e-05, "loss": 109.4556, "step": 5547, "task_loss": 2.495347738265991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988788590201423, "compression/movement_sparsity/importance_threshold": -7.974785848643105e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221633502217704, "compression/movement_sparsity/model_sparsity": 0.8904842022353652, "compression_loss": 105.58464050292969, "distillation_loss": 4.463796138763428, "epoch": 4.69, "learning_rate": 2.9501267962806428e-05, "loss": 109.5446, "step": 5548, "task_loss": 2.343773126602173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988879493079639, "compression/movement_sparsity/importance_threshold": -7.910125739003272e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221591648389302, "compression/movement_sparsity/model_sparsity": 0.8904801606333013, "compression_loss": 105.58502960205078, "distillation_loss": 3.335355758666992, "epoch": 4.69, "learning_rate": 2.949657180426411e-05, "loss": 109.7364, "step": 5549, "task_loss": 1.6353775262832642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988969903260538, "compression/movement_sparsity/importance_threshold": -7.84581608980102e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221580797396753, "compression/movement_sparsity/model_sparsity": 0.890479112810544, "compression_loss": 105.58538055419922, "distillation_loss": 4.549067497253418, "epoch": 4.69, "learning_rate": 2.94918756457218e-05, "loss": 110.0124, "step": 5550, "task_loss": 2.2798023223876953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989059822082972, "compression/movement_sparsity/importance_threshold": -7.781855948698312e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221578770288256, "compression/movement_sparsity/model_sparsity": 0.8904789170634355, "compression_loss": 105.58570861816406, "distillation_loss": 3.7638230323791504, "epoch": 4.69, "learning_rate": 2.948717948717949e-05, "loss": 109.3347, "step": 5551, "task_loss": 2.255821943283081 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989149250885789, "compression/movement_sparsity/importance_threshold": -7.718244363359716e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221695507889411, "compression/movement_sparsity/model_sparsity": 0.8904901897939783, "compression_loss": 105.58606719970703, "distillation_loss": 3.59747314453125, "epoch": 4.69, "learning_rate": 2.9482483328637177e-05, "loss": 109.0328, "step": 5552, "task_loss": 2.5557188987731934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989238191007845, "compression/movement_sparsity/importance_threshold": -7.65498038144373e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221723410441679, "compression/movement_sparsity/model_sparsity": 0.8904928841953543, "compression_loss": 105.58641815185547, "distillation_loss": 3.746750831604004, "epoch": 4.69, "learning_rate": 2.947778717009486e-05, "loss": 109.5031, "step": 5553, "task_loss": 2.1609978675842285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989326643787988, "compression/movement_sparsity/importance_threshold": -7.592063050614918e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221803540848191, "compression/movement_sparsity/model_sparsity": 0.8905006219634082, "compression_loss": 105.58673858642578, "distillation_loss": 3.2409987449645996, "epoch": 4.69, "learning_rate": 2.947309101155255e-05, "loss": 109.6684, "step": 5554, "task_loss": 1.7385210990905762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989414610565072, "compression/movement_sparsity/importance_threshold": -7.529491418533513e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222105699256085, "compression/movement_sparsity/model_sparsity": 0.8905297997971116, "compression_loss": 105.5870361328125, "distillation_loss": 4.754494667053223, "epoch": 4.7, "learning_rate": 2.946839485301024e-05, "loss": 110.6516, "step": 5555, "task_loss": 2.34639310836792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989502092677945, "compression/movement_sparsity/importance_threshold": -7.467264532861478e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222870038401544, "compression/movement_sparsity/model_sparsity": 0.8906036079715546, "compression_loss": 105.58734130859375, "distillation_loss": 3.0920376777648926, "epoch": 4.7, "learning_rate": 2.946369869446793e-05, "loss": 109.6701, "step": 5556, "task_loss": 1.7658922672271729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989589091465461, "compression/movement_sparsity/importance_threshold": -7.405381441261648e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223078115126789, "compression/movement_sparsity/model_sparsity": 0.8906237008365161, "compression_loss": 105.58771514892578, "distillation_loss": 6.285530090332031, "epoch": 4.7, "learning_rate": 2.9459002535925612e-05, "loss": 110.0462, "step": 5557, "task_loss": 4.091370105743408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989675608266471, "compression/movement_sparsity/importance_threshold": -7.343841191395119e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.92229408679573, "compression/movement_sparsity/model_sparsity": 0.8906104476058165, "compression_loss": 105.58804321289062, "distillation_loss": 5.085701942443848, "epoch": 4.7, "learning_rate": 2.9454306377383302e-05, "loss": 109.8148, "step": 5558, "task_loss": 2.441516876220703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989761644419826, "compression/movement_sparsity/importance_threshold": -7.282642830923856e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222777029893984, "compression/movement_sparsity/model_sparsity": 0.8905946266336349, "compression_loss": 105.58831787109375, "distillation_loss": 4.865406513214111, "epoch": 4.7, "learning_rate": 2.9449610218840988e-05, "loss": 109.6191, "step": 5559, "task_loss": 2.333951234817505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989847201264378, "compression/movement_sparsity/importance_threshold": -7.221785407508091e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222855848642056, "compression/movement_sparsity/model_sparsity": 0.890602237741795, "compression_loss": 105.58865356445312, "distillation_loss": 4.443185329437256, "epoch": 4.7, "learning_rate": 2.9444914060298678e-05, "loss": 109.8672, "step": 5560, "task_loss": 3.716402530670166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989932280138978, "compression/movement_sparsity/importance_threshold": -7.161267968813258e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922271883995592, "compression/movement_sparsity/model_sparsity": 0.8905890075401671, "compression_loss": 105.58895111083984, "distillation_loss": 4.058321952819824, "epoch": 4.7, "learning_rate": 2.9440217901756368e-05, "loss": 109.5738, "step": 5561, "task_loss": 2.0400383472442627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990016882382476, "compression/movement_sparsity/importance_threshold": -7.101089562498719e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223113649146344, "compression/movement_sparsity/model_sparsity": 0.8906271321681828, "compression_loss": 105.58930206298828, "distillation_loss": 3.583376884460449, "epoch": 4.7, "learning_rate": 2.943552174321405e-05, "loss": 109.4661, "step": 5562, "task_loss": 2.6728858947753906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990101009333726, "compression/movement_sparsity/importance_threshold": -7.041249236226441e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223300739336551, "compression/movement_sparsity/model_sparsity": 0.8906451984748445, "compression_loss": 105.58955383300781, "distillation_loss": 3.8355822563171387, "epoch": 4.7, "learning_rate": 2.943082558467174e-05, "loss": 109.9862, "step": 5563, "task_loss": 2.320627450942993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990184662331578, "compression/movement_sparsity/importance_threshold": -6.981746037659255e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922294873790794, "compression/movement_sparsity/model_sparsity": 0.890611207565179, "compression_loss": 105.58990478515625, "distillation_loss": 4.566092491149902, "epoch": 4.7, "learning_rate": 2.942612942612943e-05, "loss": 110.0726, "step": 5564, "task_loss": 2.442600727081299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990267842714884, "compression/movement_sparsity/importance_threshold": -6.922579014457392e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.92234235582632, "compression/movement_sparsity/model_sparsity": 0.8906570584467128, "compression_loss": 105.59020233154297, "distillation_loss": 4.087808609008789, "epoch": 4.7, "learning_rate": 2.9421433267587116e-05, "loss": 110.0832, "step": 5565, "task_loss": 2.2976455688476562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990350551822496, "compression/movement_sparsity/importance_threshold": -6.8637472142836845e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223840307922073, "compression/movement_sparsity/model_sparsity": 0.8906973017493147, "compression_loss": 105.59040069580078, "distillation_loss": 4.557065963745117, "epoch": 4.7, "learning_rate": 2.94167371090448e-05, "loss": 110.241, "step": 5566, "task_loss": 2.637129545211792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990432790993262, "compression/movement_sparsity/importance_threshold": -6.805249684800965e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223980416891795, "compression/movement_sparsity/model_sparsity": 0.8907108313288733, "compression_loss": 105.5906982421875, "distillation_loss": 3.960838556289673, "epoch": 4.71, "learning_rate": 2.941204095050249e-05, "loss": 109.6045, "step": 5567, "task_loss": 1.5041446685791016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990514561566037, "compression/movement_sparsity/importance_threshold": -6.747085473670332e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9224417437635649, "compression/movement_sparsity/model_sparsity": 0.8907530321025603, "compression_loss": 105.59091186523438, "distillation_loss": 3.085381269454956, "epoch": 4.71, "learning_rate": 2.940734479196018e-05, "loss": 109.44, "step": 5568, "task_loss": 2.2839043140411377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990595864879671, "compression/movement_sparsity/importance_threshold": -6.6892536285528825e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9224326336994911, "compression/movement_sparsity/model_sparsity": 0.8907442349972133, "compression_loss": 105.59114837646484, "distillation_loss": 3.8986258506774902, "epoch": 4.71, "learning_rate": 2.940264863341787e-05, "loss": 109.5924, "step": 5569, "task_loss": 2.2952423095703125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990676702273016, "compression/movement_sparsity/importance_threshold": -6.631753197111449e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9224525589836107, "compression/movement_sparsity/model_sparsity": 0.890763475786526, "compression_loss": 105.59136199951172, "distillation_loss": 6.003969192504883, "epoch": 4.71, "learning_rate": 2.939795247487555e-05, "loss": 110.5314, "step": 5570, "task_loss": 3.706850528717041 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990757075084923, "compression/movement_sparsity/importance_threshold": -6.57458322700713e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9224588191716195, "compression/movement_sparsity/model_sparsity": 0.8907695209178181, "compression_loss": 105.59156799316406, "distillation_loss": 4.250543594360352, "epoch": 4.71, "learning_rate": 2.939325631633324e-05, "loss": 109.8227, "step": 5571, "task_loss": 2.4149715900421143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990836984654243, "compression/movement_sparsity/importance_threshold": -6.5177427659018905e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9224967380247016, "compression/movement_sparsity/model_sparsity": 0.8908061371416448, "compression_loss": 105.59182739257812, "distillation_loss": 5.130594253540039, "epoch": 4.71, "learning_rate": 2.9388560157790928e-05, "loss": 109.7389, "step": 5572, "task_loss": 3.116185426712036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990916432319829, "compression/movement_sparsity/importance_threshold": -6.4612308614585626e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9224864355438641, "compression/movement_sparsity/model_sparsity": 0.8907961885827183, "compression_loss": 105.592041015625, "distillation_loss": 5.393634796142578, "epoch": 4.71, "learning_rate": 2.9383863999248617e-05, "loss": 110.0018, "step": 5573, "task_loss": 3.378681182861328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990995419420531, "compression/movement_sparsity/importance_threshold": -6.405046561337377e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9225333094468408, "compression/movement_sparsity/model_sparsity": 0.8908414522229267, "compression_loss": 105.59223175048828, "distillation_loss": 4.607988357543945, "epoch": 4.71, "learning_rate": 2.93791678407063e-05, "loss": 110.1617, "step": 5574, "task_loss": 2.226433277130127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991073947295199, "compression/movement_sparsity/importance_threshold": -6.3491889132020335e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922568044547164, "compression/movement_sparsity/model_sparsity": 0.8908749940656961, "compression_loss": 105.5924301147461, "distillation_loss": 3.2016005516052246, "epoch": 4.71, "learning_rate": 2.937447168216399e-05, "loss": 108.5848, "step": 5575, "task_loss": 1.7235091924667358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991152017282688, "compression/movement_sparsity/importance_threshold": -6.293656964711895e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9225539263326832, "compression/movement_sparsity/model_sparsity": 0.8908613608553154, "compression_loss": 105.59264373779297, "distillation_loss": 4.139706134796143, "epoch": 4.71, "learning_rate": 2.936977552362168e-05, "loss": 109.7354, "step": 5576, "task_loss": 2.0345497131347656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991229630721846, "compression/movement_sparsity/importance_threshold": -6.238449763532397e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9225720749158249, "compression/movement_sparsity/model_sparsity": 0.8908788859787947, "compression_loss": 105.59278869628906, "distillation_loss": 4.037724018096924, "epoch": 4.71, "learning_rate": 2.9365079365079366e-05, "loss": 109.6336, "step": 5577, "task_loss": 1.9288111925125122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991306788951527, "compression/movement_sparsity/importance_threshold": -6.1835663573220345e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9226272957361465, "compression/movement_sparsity/model_sparsity": 0.8909322097940592, "compression_loss": 105.59294128417969, "distillation_loss": 4.039983749389648, "epoch": 4.71, "learning_rate": 2.9360383206537056e-05, "loss": 109.2698, "step": 5578, "task_loss": 2.2476966381073 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991383493310582, "compression/movement_sparsity/importance_threshold": -6.12900579374364e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9226805848413113, "compression/movement_sparsity/model_sparsity": 0.890983668254525, "compression_loss": 105.59310913085938, "distillation_loss": 4.118457794189453, "epoch": 4.72, "learning_rate": 2.935568704799474e-05, "loss": 109.0787, "step": 5579, "task_loss": 2.987222671508789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991459745137861, "compression/movement_sparsity/importance_threshold": -6.0747671204600465e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9227054348066644, "compression/movement_sparsity/model_sparsity": 0.8910076645471208, "compression_loss": 105.59330749511719, "distillation_loss": 4.244807243347168, "epoch": 4.72, "learning_rate": 2.935099088945243e-05, "loss": 109.766, "step": 5580, "task_loss": 2.5980708599090576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991535545772214, "compression/movement_sparsity/importance_threshold": -6.020849385132351e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9226853664325333, "compression/movement_sparsity/model_sparsity": 0.8909882855833786, "compression_loss": 105.59346771240234, "distillation_loss": 4.611055850982666, "epoch": 4.72, "learning_rate": 2.9346294730910118e-05, "loss": 109.6902, "step": 5581, "task_loss": 2.6093008518218994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991610896552497, "compression/movement_sparsity/importance_threshold": -5.96725163542252e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9226884070952804, "compression/movement_sparsity/model_sparsity": 0.8909912217900062, "compression_loss": 105.59362030029297, "distillation_loss": 4.919064521789551, "epoch": 4.72, "learning_rate": 2.9341598572367808e-05, "loss": 109.7429, "step": 5582, "task_loss": 3.431931972503662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991685798817559, "compression/movement_sparsity/importance_threshold": -5.91397291899165e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9226719875164459, "compression/movement_sparsity/model_sparsity": 0.8909753662742171, "compression_loss": 105.59375762939453, "distillation_loss": 4.733296871185303, "epoch": 4.72, "learning_rate": 2.933690241382549e-05, "loss": 109.8308, "step": 5583, "task_loss": 3.5905094146728516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999176025390625, "compression/movement_sparsity/importance_threshold": -5.861012283503442e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922678462339472, "compression/movement_sparsity/model_sparsity": 0.8909816186671535, "compression_loss": 105.59383392333984, "distillation_loss": 4.736462593078613, "epoch": 4.72, "learning_rate": 2.933220625528318e-05, "loss": 109.4664, "step": 5584, "task_loss": 4.455751895904541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991834263157423, "compression/movement_sparsity/importance_threshold": -5.808368776618125e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9227150576099465, "compression/movement_sparsity/model_sparsity": 0.891016956777507, "compression_loss": 105.59392547607422, "distillation_loss": 5.245323181152344, "epoch": 4.72, "learning_rate": 2.9327510096740867e-05, "loss": 109.6367, "step": 5585, "task_loss": 4.07508659362793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991907827909929, "compression/movement_sparsity/importance_threshold": -5.756041445998533e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9227349948182337, "compression/movement_sparsity/model_sparsity": 0.8910362090813555, "compression_loss": 105.59402465820312, "distillation_loss": 3.6643033027648926, "epoch": 4.72, "learning_rate": 2.9322813938198557e-05, "loss": 109.8311, "step": 5586, "task_loss": 1.7547056674957275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991980949502621, "compression/movement_sparsity/importance_threshold": -5.704029339304896e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9227571737700364, "compression/movement_sparsity/model_sparsity": 0.8910576261179334, "compression_loss": 105.59414672851562, "distillation_loss": 6.0871500968933105, "epoch": 4.72, "learning_rate": 2.931811777965624e-05, "loss": 109.471, "step": 5587, "task_loss": 3.367952585220337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992053629274348, "compression/movement_sparsity/importance_threshold": -5.652331504200914e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9227508778095247, "compression/movement_sparsity/model_sparsity": 0.8910515464430339, "compression_loss": 105.59422302246094, "distillation_loss": 3.0385661125183105, "epoch": 4.72, "learning_rate": 2.931342162111393e-05, "loss": 109.5966, "step": 5588, "task_loss": 1.8398969173431396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992125868563962, "compression/movement_sparsity/importance_threshold": -5.600946988347684e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9227415173379305, "compression/movement_sparsity/model_sparsity": 0.8910425075324351, "compression_loss": 105.59430694580078, "distillation_loss": 4.551417827606201, "epoch": 4.72, "learning_rate": 2.930872546257162e-05, "loss": 109.217, "step": 5589, "task_loss": 2.4338467121124268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992197668710315, "compression/movement_sparsity/importance_threshold": -5.549874839407172e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9227355910266155, "compression/movement_sparsity/model_sparsity": 0.8910367848081452, "compression_loss": 105.59432983398438, "distillation_loss": 3.3765904903411865, "epoch": 4.72, "learning_rate": 2.9304029304029305e-05, "loss": 108.9646, "step": 5590, "task_loss": 1.3154150247573853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992269031052259, "compression/movement_sparsity/importance_threshold": -5.499114105040476e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9227502696769753, "compression/movement_sparsity/model_sparsity": 0.8910509592017083, "compression_loss": 105.59440612792969, "distillation_loss": 2.726630687713623, "epoch": 4.73, "learning_rate": 2.9299333145486995e-05, "loss": 108.7463, "step": 5591, "task_loss": 1.723734974861145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992339956928643, "compression/movement_sparsity/importance_threshold": -5.448663832911295e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9227809744086376, "compression/movement_sparsity/model_sparsity": 0.8910806091313792, "compression_loss": 105.59437561035156, "distillation_loss": 4.146106719970703, "epoch": 4.73, "learning_rate": 2.9294636986944678e-05, "loss": 109.6291, "step": 5592, "task_loss": 2.3378424644470215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992410447678322, "compression/movement_sparsity/importance_threshold": -5.398523070678993e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9228392001192035, "compression/movement_sparsity/model_sparsity": 0.891136834609664, "compression_loss": 105.59440612792969, "distillation_loss": 3.60266375541687, "epoch": 4.73, "learning_rate": 2.9289940828402368e-05, "loss": 109.2084, "step": 5593, "task_loss": 1.6358810663223267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992480504640143, "compression/movement_sparsity/importance_threshold": -5.3486908660081364e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9228588749958027, "compression/movement_sparsity/model_sparsity": 0.891155833593725, "compression_loss": 105.59440612792969, "distillation_loss": 3.673536777496338, "epoch": 4.73, "learning_rate": 2.9285244669860057e-05, "loss": 109.2136, "step": 5594, "task_loss": 1.6721880435943604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992550129152962, "compression/movement_sparsity/importance_threshold": -5.299166266558089e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9228927515560562, "compression/movement_sparsity/model_sparsity": 0.8911885463899173, "compression_loss": 105.59442901611328, "distillation_loss": 4.4172563552856445, "epoch": 4.73, "learning_rate": 2.9280548511317747e-05, "loss": 109.4181, "step": 5595, "task_loss": 2.9094643592834473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992619322555627, "compression/movement_sparsity/importance_threshold": -5.249948319992551e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9228935266269526, "compression/movement_sparsity/model_sparsity": 0.8911892948347439, "compression_loss": 105.59445190429688, "distillation_loss": 4.234524726867676, "epoch": 4.73, "learning_rate": 2.927585235277543e-05, "loss": 110.0551, "step": 5596, "task_loss": 2.562769651412964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999268808618699, "compression/movement_sparsity/importance_threshold": -5.201036073972619e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9228404163843024, "compression/movement_sparsity/model_sparsity": 0.891138009092315, "compression_loss": 105.594482421875, "distillation_loss": 3.810629367828369, "epoch": 4.73, "learning_rate": 2.9271156194233117e-05, "loss": 110.289, "step": 5597, "task_loss": 3.042508840560913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992756421385904, "compression/movement_sparsity/importance_threshold": -5.152428576159392e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9228654452121701, "compression/movement_sparsity/model_sparsity": 0.8911621781029477, "compression_loss": 105.59452819824219, "distillation_loss": 3.81217622756958, "epoch": 4.73, "learning_rate": 2.9266460035690806e-05, "loss": 109.6352, "step": 5598, "task_loss": 3.187009572982788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999282432949122, "compression/movement_sparsity/importance_threshold": -5.104124874215701e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230069373853372, "compression/movement_sparsity/model_sparsity": 0.8912988095846858, "compression_loss": 105.59464263916016, "distillation_loss": 4.167145252227783, "epoch": 4.73, "learning_rate": 2.9261763877148496e-05, "loss": 109.9234, "step": 5599, "task_loss": 2.1717355251312256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992891811841788, "compression/movement_sparsity/importance_threshold": -5.056124015804381e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9229992462972121, "compression/movement_sparsity/model_sparsity": 0.8912913827090982, "compression_loss": 105.59467315673828, "distillation_loss": 4.451948165893555, "epoch": 4.73, "learning_rate": 2.925706771860618e-05, "loss": 109.9516, "step": 5600, "task_loss": 2.9749512672424316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999295886977646, "compression/movement_sparsity/importance_threshold": -5.008425048584793e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9229671345137687, "compression/movement_sparsity/model_sparsity": 0.8912603740642036, "compression_loss": 105.5947494506836, "distillation_loss": 4.695528030395508, "epoch": 4.73, "learning_rate": 2.925237156006387e-05, "loss": 109.5495, "step": 5601, "task_loss": 3.6528375148773193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993025504634088, "compression/movement_sparsity/importance_threshold": -4.9610270202206375e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.923004159054278, "compression/movement_sparsity/model_sparsity": 0.8912961266978456, "compression_loss": 105.59481811523438, "distillation_loss": 5.4674224853515625, "epoch": 4.73, "learning_rate": 2.924767540152156e-05, "loss": 109.5259, "step": 5602, "task_loss": 1.768169641494751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993091717753523, "compression/movement_sparsity/importance_threshold": -4.9139289783730125e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9229694120297871, "compression/movement_sparsity/model_sparsity": 0.8912625733405404, "compression_loss": 105.59494018554688, "distillation_loss": 5.463784694671631, "epoch": 4.74, "learning_rate": 2.9242979242979245e-05, "loss": 110.7027, "step": 5603, "task_loss": 2.525857448577881 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993157510473617, "compression/movement_sparsity/importance_threshold": -4.867129970703016e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230296648488512, "compression/movement_sparsity/model_sparsity": 0.8913207562899101, "compression_loss": 105.5949935913086, "distillation_loss": 4.325183391571045, "epoch": 4.74, "learning_rate": 2.9238283084436928e-05, "loss": 109.7729, "step": 5604, "task_loss": 2.1610066890716553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999322288413322, "compression/movement_sparsity/importance_threshold": -4.820629044874347e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230339217766972, "compression/movement_sparsity/model_sparsity": 0.8913248669791888, "compression_loss": 105.59507751464844, "distillation_loss": 3.5139341354370117, "epoch": 4.74, "learning_rate": 2.9233586925894617e-05, "loss": 109.3365, "step": 5605, "task_loss": 2.0569570064544678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993287840071184, "compression/movement_sparsity/importance_threshold": -4.7744252485481045e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230097157163963, "compression/movement_sparsity/model_sparsity": 0.8913014924715259, "compression_loss": 105.59513092041016, "distillation_loss": 3.8185741901397705, "epoch": 4.74, "learning_rate": 2.9228890767352307e-05, "loss": 109.4896, "step": 5606, "task_loss": 2.2358760833740234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993352379626362, "compression/movement_sparsity/importance_threshold": -4.728517629385386e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230004029414728, "compression/movement_sparsity/model_sparsity": 0.8912924996190704, "compression_loss": 105.59516143798828, "distillation_loss": 3.722062587738037, "epoch": 4.74, "learning_rate": 2.9224194608809997e-05, "loss": 109.1769, "step": 5607, "task_loss": 2.3034584522247314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993416504137603, "compression/movement_sparsity/importance_threshold": -4.68290523504989e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230216279598645, "compression/movement_sparsity/model_sparsity": 0.8913129954927846, "compression_loss": 105.59525299072266, "distillation_loss": 4.388940811157227, "epoch": 4.74, "learning_rate": 2.9219498450267687e-05, "loss": 109.3216, "step": 5608, "task_loss": 2.8706541061401367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993480214943761, "compression/movement_sparsity/importance_threshold": -4.637587113200982e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230134479808664, "compression/movement_sparsity/model_sparsity": 0.8913050965212296, "compression_loss": 105.59528350830078, "distillation_loss": 3.3413963317871094, "epoch": 4.74, "learning_rate": 2.921480229172537e-05, "loss": 109.4047, "step": 5609, "task_loss": 1.530013918876648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993543513383685, "compression/movement_sparsity/importance_threshold": -4.592562311501493e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230193981405167, "compression/movement_sparsity/model_sparsity": 0.8913108422745911, "compression_loss": 105.59530639648438, "distillation_loss": 3.3174548149108887, "epoch": 4.74, "learning_rate": 2.9210106133183056e-05, "loss": 109.3122, "step": 5610, "task_loss": 2.397221088409424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993606400796227, "compression/movement_sparsity/importance_threshold": -4.547829877614255e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9229917340716015, "compression/movement_sparsity/model_sparsity": 0.8912841285515477, "compression_loss": 105.5953598022461, "distillation_loss": 4.433612823486328, "epoch": 4.74, "learning_rate": 2.9205409974640746e-05, "loss": 108.9125, "step": 5611, "task_loss": 2.8121728897094727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993668878520238, "compression/movement_sparsity/importance_threshold": -4.503388859201235e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230238220067096, "compression/movement_sparsity/model_sparsity": 0.8913151141673709, "compression_loss": 105.59539031982422, "distillation_loss": 4.191044330596924, "epoch": 4.74, "learning_rate": 2.9200713816098435e-05, "loss": 109.5013, "step": 5612, "task_loss": 2.751051664352417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993730947894571, "compression/movement_sparsity/importance_threshold": -4.459238303921795e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230173710320185, "compression/movement_sparsity/model_sparsity": 0.891308884803506, "compression_loss": 105.59541320800781, "distillation_loss": 4.524690628051758, "epoch": 4.74, "learning_rate": 2.9196017657556118e-05, "loss": 109.4429, "step": 5613, "task_loss": 2.73572039604187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993792610258077, "compression/movement_sparsity/importance_threshold": -4.415377259440502e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230478492044959, "compression/movement_sparsity/model_sparsity": 0.8913383159569969, "compression_loss": 105.59542846679688, "distillation_loss": 5.300890922546387, "epoch": 4.75, "learning_rate": 2.9191321499013808e-05, "loss": 110.4171, "step": 5614, "task_loss": 2.699972629547119 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993853866949606, "compression/movement_sparsity/importance_threshold": -4.371804773417587e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230478849769987, "compression/movement_sparsity/model_sparsity": 0.8913383505006043, "compression_loss": 105.59541320800781, "distillation_loss": 4.461864471435547, "epoch": 4.75, "learning_rate": 2.9186625340471498e-05, "loss": 109.9534, "step": 5615, "task_loss": 3.6086959838867188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999391471930801, "compression/movement_sparsity/importance_threshold": -4.328519893515882e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230271250011447, "compression/movement_sparsity/model_sparsity": 0.891318303693786, "compression_loss": 105.59545135498047, "distillation_loss": 3.668200969696045, "epoch": 4.75, "learning_rate": 2.9181929181929184e-05, "loss": 109.4683, "step": 5616, "task_loss": 2.118414878845215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993975168672142, "compression/movement_sparsity/importance_threshold": -4.285521667396486e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230357342501778, "compression/movement_sparsity/model_sparsity": 0.8913266171886296, "compression_loss": 105.595458984375, "distillation_loss": 4.201402187347412, "epoch": 4.75, "learning_rate": 2.9177233023386867e-05, "loss": 110.273, "step": 5617, "task_loss": 2.2482974529266357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994035216380851, "compression/movement_sparsity/importance_threshold": -4.24280914272223e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230642091624922, "compression/movement_sparsity/model_sparsity": 0.891354113900107, "compression_loss": 105.59546661376953, "distillation_loss": 4.8264570236206055, "epoch": 4.75, "learning_rate": 2.9172536864844557e-05, "loss": 109.6855, "step": 5618, "task_loss": 2.1224236488342285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994094863772991, "compression/movement_sparsity/importance_threshold": -4.2003813671533455e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230737604207686, "compression/movement_sparsity/model_sparsity": 0.8913633370432784, "compression_loss": 105.59548950195312, "distillation_loss": 3.561445713043213, "epoch": 4.75, "learning_rate": 2.9167840706302246e-05, "loss": 109.8813, "step": 5619, "task_loss": 2.4546115398406982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999415411218741, "compression/movement_sparsity/importance_threshold": -4.158237388353532e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230781723627939, "compression/movement_sparsity/model_sparsity": 0.8913675974215224, "compression_loss": 105.59547424316406, "distillation_loss": 3.212094783782959, "epoch": 4.75, "learning_rate": 2.9163144547759936e-05, "loss": 109.4867, "step": 5620, "task_loss": 2.044743299484253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994212962962963, "compression/movement_sparsity/importance_threshold": -4.11637625398302e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230601072488255, "compression/movement_sparsity/model_sparsity": 0.8913501528997937, "compression_loss": 105.5954818725586, "distillation_loss": 4.381831169128418, "epoch": 4.75, "learning_rate": 2.9158448389217623e-05, "loss": 109.6541, "step": 5621, "task_loss": 1.6181261539459229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99942714174385, "compression/movement_sparsity/importance_threshold": -4.074797011704642e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231071719384842, "compression/movement_sparsity/model_sparsity": 0.8913956007725747, "compression_loss": 105.59550476074219, "distillation_loss": 4.147646903991699, "epoch": 4.75, "learning_rate": 2.915375223067531e-05, "loss": 109.6488, "step": 5622, "task_loss": 2.7751691341400146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999432947695287, "compression/movement_sparsity/importance_threshold": -4.033498709180364e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231165443342461, "compression/movement_sparsity/model_sparsity": 0.8914046511977092, "compression_loss": 105.59548950195312, "distillation_loss": 3.9569664001464844, "epoch": 4.75, "learning_rate": 2.9149056072132995e-05, "loss": 109.067, "step": 5623, "task_loss": 2.0581822395324707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994387142844928, "compression/movement_sparsity/importance_threshold": -3.992480394071282e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.923099647788706, "compression/movement_sparsity/model_sparsity": 0.8913883351004883, "compression_loss": 105.595458984375, "distillation_loss": 3.1725564002990723, "epoch": 4.75, "learning_rate": 2.9144359913590685e-05, "loss": 109.2262, "step": 5624, "task_loss": 1.793503761291504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994444416453524, "compression/movement_sparsity/importance_threshold": -3.95174111404023e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231342397990177, "compression/movement_sparsity/model_sparsity": 0.8914217387688282, "compression_loss": 105.59546661376953, "distillation_loss": 4.747411727905273, "epoch": 4.75, "learning_rate": 2.9139663755048375e-05, "loss": 109.6433, "step": 5625, "task_loss": 2.768575429916382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999450129911751, "compression/movement_sparsity/importance_threshold": -3.911279916748306e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231626670146615, "compression/movement_sparsity/model_sparsity": 0.8914491894221624, "compression_loss": 105.59541320800781, "distillation_loss": 3.8335840702056885, "epoch": 4.76, "learning_rate": 2.9134967596506058e-05, "loss": 109.3032, "step": 5626, "task_loss": 1.6947163343429565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994557792175736, "compression/movement_sparsity/importance_threshold": -3.871095849857474e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231856210373606, "compression/movement_sparsity/model_sparsity": 0.891471354903567, "compression_loss": 105.59539031982422, "distillation_loss": 3.6503806114196777, "epoch": 4.76, "learning_rate": 2.9130271437963747e-05, "loss": 109.0221, "step": 5627, "task_loss": 2.40445876121521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994613896967054, "compression/movement_sparsity/importance_threshold": -3.831187961028833e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232072395532844, "compression/movement_sparsity/model_sparsity": 0.8914922307569625, "compression_loss": 105.59532165527344, "distillation_loss": 5.182345867156982, "epoch": 4.76, "learning_rate": 2.9125575279421434e-05, "loss": 109.978, "step": 5628, "task_loss": 2.393442153930664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994669614830316, "compression/movement_sparsity/importance_threshold": -3.7915552979260816e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232223355495114, "compression/movement_sparsity/model_sparsity": 0.8915068081592784, "compression_loss": 105.59522247314453, "distillation_loss": 3.4096109867095947, "epoch": 4.76, "learning_rate": 2.9120879120879123e-05, "loss": 109.5878, "step": 5629, "task_loss": 3.159416675567627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994724947104373, "compression/movement_sparsity/importance_threshold": -3.7521969082094514e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232880257890174, "compression/movement_sparsity/model_sparsity": 0.8915702417369705, "compression_loss": 105.59512329101562, "distillation_loss": 4.368607997894287, "epoch": 4.76, "learning_rate": 2.9116182962336806e-05, "loss": 110.0433, "step": 5630, "task_loss": 3.4886624813079834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994779895128076, "compression/movement_sparsity/importance_threshold": -3.7131118395417745e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232241599471597, "compression/movement_sparsity/model_sparsity": 0.8915085698832549, "compression_loss": 105.59504699707031, "distillation_loss": 5.1035566329956055, "epoch": 4.76, "learning_rate": 2.9111486803794496e-05, "loss": 109.8164, "step": 5631, "task_loss": 2.4911258220672607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994834460240277, "compression/movement_sparsity/importance_threshold": -3.6742991395841487e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231967701341002, "compression/movement_sparsity/model_sparsity": 0.8914821209945348, "compression_loss": 105.59493255615234, "distillation_loss": 3.635938882827759, "epoch": 4.76, "learning_rate": 2.9106790645252186e-05, "loss": 109.3289, "step": 5632, "task_loss": 2.4277255535125732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994888643779828, "compression/movement_sparsity/importance_threshold": -3.6357578559994067e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232088493159153, "compression/movement_sparsity/model_sparsity": 0.8914937852192948, "compression_loss": 105.59486389160156, "distillation_loss": 4.438970565795898, "epoch": 4.76, "learning_rate": 2.9102094486709876e-05, "loss": 109.6702, "step": 5633, "task_loss": 2.2240822315216064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999494244708558, "compression/movement_sparsity/importance_threshold": -3.5974870364477787e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232002162185469, "compression/movement_sparsity/model_sparsity": 0.8914854486953795, "compression_loss": 105.59474182128906, "distillation_loss": 2.9462890625, "epoch": 4.76, "learning_rate": 2.909739832816756e-05, "loss": 109.0683, "step": 5634, "task_loss": 2.430166244506836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994995871496383, "compression/movement_sparsity/importance_threshold": -3.559485728592965e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232023864170567, "compression/movement_sparsity/model_sparsity": 0.8914875443408941, "compression_loss": 105.59465026855469, "distillation_loss": 4.254531383514404, "epoch": 4.76, "learning_rate": 2.9092702169625248e-05, "loss": 110.4902, "step": 5635, "task_loss": 2.5196287631988525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995048918351089, "compression/movement_sparsity/importance_threshold": -3.5217529800951955e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232294542775901, "compression/movement_sparsity/model_sparsity": 0.8915136823371477, "compression_loss": 105.59455871582031, "distillation_loss": 4.860739707946777, "epoch": 4.76, "learning_rate": 2.9088006011082935e-05, "loss": 109.7332, "step": 5636, "task_loss": 2.210709571838379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995101588988551, "compression/movement_sparsity/importance_threshold": -3.4842878386181705e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232520267269247, "compression/movement_sparsity/model_sparsity": 0.8915354793534068, "compression_loss": 105.59444427490234, "distillation_loss": 3.9420857429504395, "epoch": 4.76, "learning_rate": 2.9083309852540624e-05, "loss": 109.5539, "step": 5637, "task_loss": 1.9496756792068481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995153884747618, "compression/movement_sparsity/importance_threshold": -3.4470893518221205e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232867141305775, "compression/movement_sparsity/model_sparsity": 0.8915689751380331, "compression_loss": 105.59432983398438, "distillation_loss": 2.0521254539489746, "epoch": 4.77, "learning_rate": 2.9078613693998314e-05, "loss": 109.174, "step": 5638, "task_loss": 1.169487714767456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995205806967143, "compression/movement_sparsity/importance_threshold": -3.4101565673698778e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233128519060353, "compression/movement_sparsity/model_sparsity": 0.8915942150004947, "compression_loss": 105.5942153930664, "distillation_loss": 4.776771545410156, "epoch": 4.77, "learning_rate": 2.9073917535455997e-05, "loss": 110.2346, "step": 5639, "task_loss": 2.464993953704834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995257356985978, "compression/movement_sparsity/importance_threshold": -3.373488532921673e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233492325414923, "compression/movement_sparsity/model_sparsity": 0.8916293458492038, "compression_loss": 105.59412384033203, "distillation_loss": 3.642766237258911, "epoch": 4.77, "learning_rate": 2.9069221376913687e-05, "loss": 109.6941, "step": 5640, "task_loss": 2.985211133956909 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995308536142973, "compression/movement_sparsity/importance_threshold": -3.3370842961420732e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233320259675938, "compression/movement_sparsity/model_sparsity": 0.8916127303740523, "compression_loss": 105.59396362304688, "distillation_loss": 5.534060478210449, "epoch": 4.77, "learning_rate": 2.9064525218371373e-05, "loss": 109.9013, "step": 5641, "task_loss": 2.77545428276062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999535934577698, "compression/movement_sparsity/importance_threshold": -3.3009429046904418e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234142073309402, "compression/movement_sparsity/model_sparsity": 0.8916920885547484, "compression_loss": 105.5938720703125, "distillation_loss": 3.448850631713867, "epoch": 4.77, "learning_rate": 2.9059829059829063e-05, "loss": 109.4608, "step": 5642, "task_loss": 1.4353413581848145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999540978722685, "compression/movement_sparsity/importance_threshold": -3.265063406229611e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233681800438657, "compression/movement_sparsity/model_sparsity": 0.8916476424465813, "compression_loss": 105.59375762939453, "distillation_loss": 3.6095166206359863, "epoch": 4.77, "learning_rate": 2.9055132901286746e-05, "loss": 109.7091, "step": 5643, "task_loss": 2.6888864040374756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995459861831435, "compression/movement_sparsity/importance_threshold": -3.2294448484215463e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233395978140425, "compression/movement_sparsity/model_sparsity": 0.8916200421042819, "compression_loss": 105.59362030029297, "distillation_loss": 4.494846820831299, "epoch": 4.77, "learning_rate": 2.9050436742744435e-05, "loss": 109.1673, "step": 5644, "task_loss": 3.707651376724243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995509570929586, "compression/movement_sparsity/importance_threshold": -3.1940862789282126e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233313462900385, "compression/movement_sparsity/model_sparsity": 0.891612074045512, "compression_loss": 105.59349822998047, "distillation_loss": 3.6954407691955566, "epoch": 4.77, "learning_rate": 2.9045740584202125e-05, "loss": 109.2076, "step": 5645, "task_loss": 1.8971306085586548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995558915860154, "compression/movement_sparsity/importance_threshold": -3.158986745410708e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233295218923903, "compression/movement_sparsity/model_sparsity": 0.8916103123215355, "compression_loss": 105.59339141845703, "distillation_loss": 3.2369766235351562, "epoch": 4.77, "learning_rate": 2.9041044425659815e-05, "loss": 109.8743, "step": 5646, "task_loss": 1.513222098350525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995607897961991, "compression/movement_sparsity/importance_threshold": -3.124145295532732e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232625915394501, "compression/movement_sparsity/model_sparsity": 0.8915456812321207, "compression_loss": 105.59329223632812, "distillation_loss": 4.178921699523926, "epoch": 4.77, "learning_rate": 2.9036348267117498e-05, "loss": 109.8048, "step": 5647, "task_loss": 2.5056655406951904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995656518573949, "compression/movement_sparsity/importance_threshold": -3.089560976953648e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232662761072495, "compression/movement_sparsity/model_sparsity": 0.8915492392236812, "compression_loss": 105.59317016601562, "distillation_loss": 3.5157508850097656, "epoch": 4.77, "learning_rate": 2.9031652108575184e-05, "loss": 108.7803, "step": 5648, "task_loss": 2.3931984901428223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995704779034879, "compression/movement_sparsity/importance_threshold": -3.0552328373362886e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232725601435937, "compression/movement_sparsity/model_sparsity": 0.891555307384045, "compression_loss": 105.5929946899414, "distillation_loss": 3.1907923221588135, "epoch": 4.77, "learning_rate": 2.9026955950032874e-05, "loss": 109.2833, "step": 5649, "task_loss": 1.580336093902588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995752680683632, "compression/movement_sparsity/importance_threshold": -3.0211599243426188e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233016908851281, "compression/movement_sparsity/model_sparsity": 0.891583437394991, "compression_loss": 105.59283447265625, "distillation_loss": 3.436213970184326, "epoch": 4.78, "learning_rate": 2.9022259791490564e-05, "loss": 108.3976, "step": 5650, "task_loss": 1.114706039428711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995800224859058, "compression/movement_sparsity/importance_threshold": -2.987341285635471e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233805215573688, "compression/movement_sparsity/model_sparsity": 0.8916595599911288, "compression_loss": 105.59259796142578, "distillation_loss": 4.500936985015869, "epoch": 4.78, "learning_rate": 2.9017563632948247e-05, "loss": 110.0624, "step": 5651, "task_loss": 1.7042773962020874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995847412900012, "compression/movement_sparsity/importance_threshold": -2.953775968875076e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233636965568346, "compression/movement_sparsity/model_sparsity": 0.8916433129811226, "compression_loss": 105.59243774414062, "distillation_loss": 3.972412586212158, "epoch": 4.78, "learning_rate": 2.9012867474405936e-05, "loss": 109.19, "step": 5652, "task_loss": 2.211865186691284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995894246145341, "compression/movement_sparsity/importance_threshold": -2.9204630217242664e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234621901815068, "compression/movement_sparsity/model_sparsity": 0.8917384230467855, "compression_loss": 105.59221649169922, "distillation_loss": 3.1871862411499023, "epoch": 4.78, "learning_rate": 2.9008171315863626e-05, "loss": 109.2552, "step": 5653, "task_loss": 2.534388780593872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995940725933901, "compression/movement_sparsity/importance_threshold": -2.887401491845007e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.92343372719336, "compression/movement_sparsity/model_sparsity": 0.891710937849844, "compression_loss": 105.59204864501953, "distillation_loss": 3.4078369140625, "epoch": 4.78, "learning_rate": 2.9003475157321312e-05, "loss": 109.543, "step": 5654, "task_loss": 1.1887308359146118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999598685360454, "compression/movement_sparsity/importance_threshold": -2.8545904268975283e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234555603443013, "compression/movement_sparsity/model_sparsity": 0.8917320209648838, "compression_loss": 105.59182739257812, "distillation_loss": 5.036890506744385, "epoch": 4.78, "learning_rate": 2.8998778998779002e-05, "loss": 109.8181, "step": 5655, "task_loss": 2.609529495239258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996032630496111, "compression/movement_sparsity/importance_threshold": -2.8220288745455305e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234761891543113, "compression/movement_sparsity/model_sparsity": 0.8917519411118083, "compression_loss": 105.5916519165039, "distillation_loss": 4.7924699783325195, "epoch": 4.78, "learning_rate": 2.8994082840236685e-05, "loss": 109.4194, "step": 5656, "task_loss": 2.297187089920044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996078057947465, "compression/movement_sparsity/importance_threshold": -2.7897158824509785e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.923502004977243, "compression/movement_sparsity/model_sparsity": 0.8917768700818035, "compression_loss": 105.59140014648438, "distillation_loss": 3.319608688354492, "epoch": 4.78, "learning_rate": 2.8989386681694375e-05, "loss": 109.513, "step": 5657, "task_loss": 2.335585355758667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996123137297453, "compression/movement_sparsity/importance_threshold": -2.757650498274103e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9235383736885323, "compression/movement_sparsity/model_sparsity": 0.8918119894159768, "compression_loss": 105.59123229980469, "distillation_loss": 2.5052177906036377, "epoch": 4.78, "learning_rate": 2.8984690523152065e-05, "loss": 109.3687, "step": 5658, "task_loss": 1.3223247528076172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996167869884928, "compression/movement_sparsity/importance_threshold": -2.7258317696777362e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9235288343544237, "compression/movement_sparsity/model_sparsity": 0.8918027777873413, "compression_loss": 105.59101104736328, "distillation_loss": 4.717195510864258, "epoch": 4.78, "learning_rate": 2.8979994364609754e-05, "loss": 109.1909, "step": 5659, "task_loss": 1.7128852605819702 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996212257048739, "compression/movement_sparsity/importance_threshold": -2.6942587443238436e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9235593482994038, "compression/movement_sparsity/model_sparsity": 0.8918322434844395, "compression_loss": 105.59085083007812, "distillation_loss": 4.351246356964111, "epoch": 4.78, "learning_rate": 2.8975298206067437e-05, "loss": 109.2514, "step": 5660, "task_loss": 1.8092553615570068 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996256300127739, "compression/movement_sparsity/importance_threshold": -2.662930469873523e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9235111388896521, "compression/movement_sparsity/model_sparsity": 0.8917856902162221, "compression_loss": 105.59058380126953, "distillation_loss": 3.011598825454712, "epoch": 4.78, "learning_rate": 2.8970602047525124e-05, "loss": 109.2675, "step": 5661, "task_loss": 1.384644865989685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996300000460779, "compression/movement_sparsity/importance_threshold": -2.6318459939896066e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9235252571041329, "compression/movement_sparsity/model_sparsity": 0.8917993234266028, "compression_loss": 105.59040069580078, "distillation_loss": 4.71213436126709, "epoch": 4.79, "learning_rate": 2.8965905888982813e-05, "loss": 110.8348, "step": 5662, "task_loss": 1.8791306018829346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999634335938671, "compression/movement_sparsity/importance_threshold": -2.60100436433406e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.923543823033142, "compression/movement_sparsity/model_sparsity": 0.891817251558835, "compression_loss": 105.59024047851562, "distillation_loss": 4.9619011878967285, "epoch": 4.79, "learning_rate": 2.8961209730440503e-05, "loss": 109.8832, "step": 5663, "task_loss": 2.425048589706421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996386378244385, "compression/movement_sparsity/importance_threshold": -2.5704046285671134e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9235409135362388, "compression/movement_sparsity/model_sparsity": 0.8918144420121011, "compression_loss": 105.59011840820312, "distillation_loss": 4.531918048858643, "epoch": 4.79, "learning_rate": 2.8956513571898186e-05, "loss": 110.0297, "step": 5664, "task_loss": 2.5540988445281982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996429058372653, "compression/movement_sparsity/importance_threshold": -2.5400458343524668e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9235100418662295, "compression/movement_sparsity/model_sparsity": 0.891784630878929, "compression_loss": 105.58992004394531, "distillation_loss": 2.8323559761047363, "epoch": 4.79, "learning_rate": 2.8951817413355876e-05, "loss": 109.5181, "step": 5665, "task_loss": 1.4230221509933472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996471401110367, "compression/movement_sparsity/importance_threshold": -2.509927029351218e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9235968140341158, "compression/movement_sparsity/model_sparsity": 0.8918684221559059, "compression_loss": 105.58977508544922, "distillation_loss": 5.515476703643799, "epoch": 4.79, "learning_rate": 2.8947121254813565e-05, "loss": 109.7155, "step": 5666, "task_loss": 2.911625385284424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996513407796378, "compression/movement_sparsity/importance_threshold": -2.480047261225332e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9236320261011444, "compression/movement_sparsity/model_sparsity": 0.8919024245801072, "compression_loss": 105.58963012695312, "distillation_loss": 3.8019843101501465, "epoch": 4.79, "learning_rate": 2.8942425096271252e-05, "loss": 109.7141, "step": 5667, "task_loss": 1.8915163278579712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996555079769538, "compression/movement_sparsity/importance_threshold": -2.450405577635907e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9237103559583444, "compression/movement_sparsity/model_sparsity": 0.8919780635657415, "compression_loss": 105.58944702148438, "distillation_loss": 5.814002990722656, "epoch": 4.79, "learning_rate": 2.893772893772894e-05, "loss": 110.0979, "step": 5668, "task_loss": 2.691814422607422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996596418368697, "compression/movement_sparsity/importance_threshold": -2.4210010262457754e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9237044534953647, "compression/movement_sparsity/model_sparsity": 0.8919723638705233, "compression_loss": 105.58927154541016, "distillation_loss": 4.005598068237305, "epoch": 4.79, "learning_rate": 2.8933032779186624e-05, "loss": 110.1702, "step": 5669, "task_loss": 2.1625771522521973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996637424932708, "compression/movement_sparsity/importance_threshold": -2.391832654716035e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9237303647116374, "compression/movement_sparsity/model_sparsity": 0.8919973849568048, "compression_loss": 105.58905792236328, "distillation_loss": 4.696724891662598, "epoch": 4.79, "learning_rate": 2.8928336620644314e-05, "loss": 109.6029, "step": 5670, "task_loss": 1.9518624544143677 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996678100800421, "compression/movement_sparsity/importance_threshold": -2.362899510709518e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9237589350172929, "compression/movement_sparsity/model_sparsity": 0.8920249737845685, "compression_loss": 105.58890533447266, "distillation_loss": 3.7495694160461426, "epoch": 4.79, "learning_rate": 2.8923640462102004e-05, "loss": 109.2726, "step": 5671, "task_loss": 3.1921303272247314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996718447310688, "compression/movement_sparsity/importance_threshold": -2.3342006418873226e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9238293114546797, "compression/movement_sparsity/model_sparsity": 0.8920929325748279, "compression_loss": 105.58869934082031, "distillation_loss": 2.6067862510681152, "epoch": 4.79, "learning_rate": 2.891894430355969e-05, "loss": 108.7041, "step": 5672, "task_loss": 2.5720138549804688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996758465802362, "compression/movement_sparsity/importance_threshold": -2.3057350959105466e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9238496183121636, "compression/movement_sparsity/model_sparsity": 0.892112541829286, "compression_loss": 105.5885238647461, "distillation_loss": 3.054180860519409, "epoch": 4.79, "learning_rate": 2.8914248145017377e-05, "loss": 109.63, "step": 5673, "task_loss": 2.485543727874756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996798157614292, "compression/movement_sparsity/importance_threshold": -2.27750192044289e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9238721192164925, "compression/movement_sparsity/model_sparsity": 0.8921342697583303, "compression_loss": 105.5882797241211, "distillation_loss": 4.995856285095215, "epoch": 4.8, "learning_rate": 2.8909551986475063e-05, "loss": 109.6446, "step": 5674, "task_loss": 2.46877121925354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999683752408533, "compression/movement_sparsity/importance_threshold": -2.2495001631445827e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9239219383888752, "compression/movement_sparsity/model_sparsity": 0.8921823774888799, "compression_loss": 105.58799743652344, "distillation_loss": 3.9778873920440674, "epoch": 4.8, "learning_rate": 2.8904855827932753e-05, "loss": 109.3769, "step": 5675, "task_loss": 2.864292621612549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996876566554328, "compression/movement_sparsity/importance_threshold": -2.221728871679325e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9239171567976532, "compression/movement_sparsity/model_sparsity": 0.8921777601600263, "compression_loss": 105.58771514892578, "distillation_loss": 2.5176913738250732, "epoch": 4.8, "learning_rate": 2.8900159669390442e-05, "loss": 109.4737, "step": 5676, "task_loss": 0.6515865325927734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996915286360136, "compression/movement_sparsity/importance_threshold": -2.1941870937073477e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9239320023863599, "compression/movement_sparsity/model_sparsity": 0.8921920957570905, "compression_loss": 105.58747863769531, "distillation_loss": 5.176164627075195, "epoch": 4.8, "learning_rate": 2.8895463510848125e-05, "loss": 109.1766, "step": 5677, "task_loss": 3.2427773475646973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996953684841609, "compression/movement_sparsity/importance_threshold": -2.1668738768906154e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9238633549532802, "compression/movement_sparsity/model_sparsity": 0.8921258065745213, "compression_loss": 105.58726501464844, "distillation_loss": 4.883923530578613, "epoch": 4.8, "learning_rate": 2.8890767352305815e-05, "loss": 109.8183, "step": 5678, "task_loss": 3.7837846279144287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996991763337594, "compression/movement_sparsity/importance_threshold": -2.1397882688910935e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.923901512289715, "compression/movement_sparsity/model_sparsity": 0.8921626530890638, "compression_loss": 105.58695220947266, "distillation_loss": 3.678581714630127, "epoch": 4.8, "learning_rate": 2.8886071193763505e-05, "loss": 109.1675, "step": 5679, "task_loss": 2.2822413444519043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997029523186944, "compression/movement_sparsity/importance_threshold": -2.1129293173716146e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9238682677103461, "compression/movement_sparsity/model_sparsity": 0.8921305505632686, "compression_loss": 105.5866928100586, "distillation_loss": 3.6733479499816895, "epoch": 4.8, "learning_rate": 2.888137503522119e-05, "loss": 108.8303, "step": 5680, "task_loss": 2.2830872535705566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997066965728512, "compression/movement_sparsity/importance_threshold": -2.0862960699932764e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9238836379624288, "compression/movement_sparsity/model_sparsity": 0.8921453927999078, "compression_loss": 105.58638763427734, "distillation_loss": 5.193612098693848, "epoch": 4.8, "learning_rate": 2.8876678876678874e-05, "loss": 109.6532, "step": 5681, "task_loss": 3.197232723236084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997104092301147, "compression/movement_sparsity/importance_threshold": -2.059887574417177e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9239071047243361, "compression/movement_sparsity/model_sparsity": 0.8921680534063514, "compression_loss": 105.58607482910156, "distillation_loss": 5.661660194396973, "epoch": 4.8, "learning_rate": 2.8871982718136564e-05, "loss": 109.8416, "step": 5682, "task_loss": 2.7051055431365967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997140904243702, "compression/movement_sparsity/importance_threshold": -2.0337028783070157e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9239349476557659, "compression/movement_sparsity/model_sparsity": 0.8921949398474317, "compression_loss": 105.5857925415039, "distillation_loss": 3.843900203704834, "epoch": 4.8, "learning_rate": 2.8867286559594254e-05, "loss": 109.8439, "step": 5683, "task_loss": 2.192274570465088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997177402895029, "compression/movement_sparsity/importance_threshold": -2.007741029322156e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.923850226444713, "compression/movement_sparsity/model_sparsity": 0.8921131290706115, "compression_loss": 105.58551025390625, "distillation_loss": 5.313838005065918, "epoch": 4.8, "learning_rate": 2.8862590401051943e-05, "loss": 109.6095, "step": 5684, "task_loss": 2.897930860519409 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997213589593976, "compression/movement_sparsity/importance_threshold": -1.9820010751271655e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9239206148062675, "compression/movement_sparsity/model_sparsity": 0.8921810993754067, "compression_loss": 105.58521270751953, "distillation_loss": 4.0972795486450195, "epoch": 4.81, "learning_rate": 2.885789424250963e-05, "loss": 109.5899, "step": 5685, "task_loss": 2.744323253631592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997249465679399, "compression/movement_sparsity/importance_threshold": -1.9564820633814067e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.923930786121261, "compression/movement_sparsity/model_sparsity": 0.8921909212744394, "compression_loss": 105.5849609375, "distillation_loss": 5.196528434753418, "epoch": 4.81, "learning_rate": 2.8853198083967316e-05, "loss": 109.6324, "step": 5686, "task_loss": 2.3246541023254395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997285032490146, "compression/movement_sparsity/importance_threshold": -1.93118304174858e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9239328490022619, "compression/movement_sparsity/model_sparsity": 0.8921929132891319, "compression_loss": 105.58465576171875, "distillation_loss": 4.453872203826904, "epoch": 4.81, "learning_rate": 2.8848501925425002e-05, "loss": 109.1517, "step": 5687, "task_loss": 2.869753837585449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999732029136507, "compression/movement_sparsity/importance_threshold": -1.9061030578897825e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.923921616436349, "compression/movement_sparsity/model_sparsity": 0.8921820665964134, "compression_loss": 105.58441925048828, "distillation_loss": 5.514354705810547, "epoch": 4.81, "learning_rate": 2.8843805766882692e-05, "loss": 109.9694, "step": 5688, "task_loss": 2.9953551292419434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997355243643022, "compression/movement_sparsity/importance_threshold": -1.88124115946698e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9239935310913608, "compression/movement_sparsity/model_sparsity": 0.8922515107617903, "compression_loss": 105.58411407470703, "distillation_loss": 4.745085716247559, "epoch": 4.81, "learning_rate": 2.8839109608340382e-05, "loss": 109.5886, "step": 5689, "task_loss": 4.832146644592285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997389890662853, "compression/movement_sparsity/importance_threshold": -1.8565963941421376e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9240552982797147, "compression/movement_sparsity/model_sparsity": 0.892311156057206, "compression_loss": 105.5838851928711, "distillation_loss": 2.2397568225860596, "epoch": 4.81, "learning_rate": 2.8834413449798065e-05, "loss": 109.2728, "step": 5690, "task_loss": 1.1919623613357544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997424233763416, "compression/movement_sparsity/importance_threshold": -1.832167809576353e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9240842740070698, "compression/movement_sparsity/model_sparsity": 0.8923391363791867, "compression_loss": 105.58357238769531, "distillation_loss": 2.749793291091919, "epoch": 4.81, "learning_rate": 2.8829717291255754e-05, "loss": 108.9414, "step": 5691, "task_loss": 1.9707024097442627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997458274283559, "compression/movement_sparsity/importance_threshold": -1.8079544534333261e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.924142618959312, "compression/movement_sparsity/model_sparsity": 0.8923954770028294, "compression_loss": 105.5832748413086, "distillation_loss": 3.247547149658203, "epoch": 4.81, "learning_rate": 2.882502113271344e-05, "loss": 110.1397, "step": 5692, "task_loss": 3.168815851211548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997492013562138, "compression/movement_sparsity/importance_threshold": -1.78395537337242e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241096247874636, "compression/movement_sparsity/model_sparsity": 0.8923636162822859, "compression_loss": 105.58297729492188, "distillation_loss": 4.742160797119141, "epoch": 4.81, "learning_rate": 2.882032497417113e-05, "loss": 109.8951, "step": 5693, "task_loss": 2.542713165283203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997525452938001, "compression/movement_sparsity/importance_threshold": -1.7601696170573347e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241432270918615, "compression/movement_sparsity/model_sparsity": 0.892396064244155, "compression_loss": 105.58264923095703, "distillation_loss": 3.4105172157287598, "epoch": 4.81, "learning_rate": 2.8815628815628813e-05, "loss": 109.6945, "step": 5694, "task_loss": 2.0642993450164795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999755859375, "compression/movement_sparsity/importance_threshold": -1.736596232149168e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241433344093702, "compression/movement_sparsity/model_sparsity": 0.8923961678749771, "compression_loss": 105.58228302001953, "distillation_loss": 4.258639335632324, "epoch": 4.81, "learning_rate": 2.8810932657086503e-05, "loss": 109.1371, "step": 5695, "task_loss": 2.847707986831665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997591437336987, "compression/movement_sparsity/importance_threshold": -1.7132342663098848e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241739318235238, "compression/movement_sparsity/model_sparsity": 0.8924257141738259, "compression_loss": 105.58199310302734, "distillation_loss": 3.7251839637756348, "epoch": 4.81, "learning_rate": 2.8806236498544193e-05, "loss": 109.5452, "step": 5696, "task_loss": 1.6468908786773682 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997623985037813, "compression/movement_sparsity/importance_threshold": -1.6900827672014507e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241799058315094, "compression/movement_sparsity/model_sparsity": 0.892431482956259, "compression_loss": 105.5816650390625, "distillation_loss": 4.827065944671631, "epoch": 4.82, "learning_rate": 2.8801540340001883e-05, "loss": 109.6491, "step": 5697, "task_loss": 3.0681657791137695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999765623819133, "compression/movement_sparsity/importance_threshold": -1.6671407824858306e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9240976648473249, "compression/movement_sparsity/model_sparsity": 0.892352067202884, "compression_loss": 105.58138275146484, "distillation_loss": 4.307301998138428, "epoch": 4.82, "learning_rate": 2.879684418145957e-05, "loss": 110.0123, "step": 5698, "task_loss": 2.392587661743164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999768819813639, "compression/movement_sparsity/importance_threshold": -1.6444073598241224e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241092193657641, "compression/movement_sparsity/model_sparsity": 0.892363224788069, "compression_loss": 105.58102416992188, "distillation_loss": 5.902026176452637, "epoch": 4.82, "learning_rate": 2.8792148022917252e-05, "loss": 109.8708, "step": 5699, "task_loss": 3.178363084793091 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997719866211843, "compression/movement_sparsity/importance_threshold": -1.6218815468791586e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241087543232263, "compression/movement_sparsity/model_sparsity": 0.892362775721173, "compression_loss": 105.58070373535156, "distillation_loss": 3.7078473567962646, "epoch": 4.82, "learning_rate": 2.878745186437494e-05, "loss": 109.1491, "step": 5700, "task_loss": 1.729151964187622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999775124375654, "compression/movement_sparsity/importance_threshold": -1.5995623913120371e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241259608971247, "compression/movement_sparsity/model_sparsity": 0.8923793911963245, "compression_loss": 105.58043670654297, "distillation_loss": 5.178895950317383, "epoch": 4.82, "learning_rate": 2.878275570583263e-05, "loss": 109.9161, "step": 5701, "task_loss": 2.5318405628204346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997782332109333, "compression/movement_sparsity/importance_threshold": -1.5774489407855904e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.924137968533934, "compression/movement_sparsity/model_sparsity": 0.8923909863338696, "compression_loss": 105.58008575439453, "distillation_loss": 4.757645606994629, "epoch": 4.82, "learning_rate": 2.877805954729032e-05, "loss": 109.9599, "step": 5702, "task_loss": 2.0743653774261475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997813132609075, "compression/movement_sparsity/importance_threshold": -1.5555402429609164e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241270698447149, "compression/movement_sparsity/model_sparsity": 0.8923804620481534, "compression_loss": 105.57978057861328, "distillation_loss": 2.612891435623169, "epoch": 4.82, "learning_rate": 2.8773363388748004e-05, "loss": 109.0473, "step": 5703, "task_loss": 1.7805851697921753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997843646594615, "compression/movement_sparsity/importance_threshold": -1.5338353454991127e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241744087902293, "compression/movement_sparsity/model_sparsity": 0.8924261747552577, "compression_loss": 105.57941436767578, "distillation_loss": 4.792078018188477, "epoch": 4.82, "learning_rate": 2.8768667230205694e-05, "loss": 110.0123, "step": 5704, "task_loss": 2.147902727127075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997873875404806, "compression/movement_sparsity/importance_threshold": -1.5123332960638794e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241322449334689, "compression/movement_sparsity/model_sparsity": 0.8923854593566882, "compression_loss": 105.57909393310547, "distillation_loss": 4.631555080413818, "epoch": 4.82, "learning_rate": 2.876397107166338e-05, "loss": 110.021, "step": 5705, "task_loss": 3.199936866760254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997903820378499, "compression/movement_sparsity/importance_threshold": -1.4910331423154469e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241245657695114, "compression/movement_sparsity/model_sparsity": 0.8923780439956365, "compression_loss": 105.57874298095703, "distillation_loss": 3.8345179557800293, "epoch": 4.82, "learning_rate": 2.875927491312107e-05, "loss": 109.7949, "step": 5706, "task_loss": 2.0894129276275635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997933482854545, "compression/movement_sparsity/importance_threshold": -1.4699339319166477e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9240593286483756, "compression/movement_sparsity/model_sparsity": 0.8923150479703046, "compression_loss": 105.57840728759766, "distillation_loss": 3.729022264480591, "epoch": 4.82, "learning_rate": 2.8754578754578753e-05, "loss": 109.3279, "step": 5707, "task_loss": 1.7982436418533325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997962864171795, "compression/movement_sparsity/importance_threshold": -1.449034712529447e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241724770750722, "compression/movement_sparsity/model_sparsity": 0.8924243094004589, "compression_loss": 105.57804107666016, "distillation_loss": 4.331898212432861, "epoch": 4.82, "learning_rate": 2.8749882596036443e-05, "loss": 109.6619, "step": 5708, "task_loss": 2.1471571922302246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997991965669102, "compression/movement_sparsity/importance_threshold": -1.4283345318149426e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241650840911381, "compression/movement_sparsity/model_sparsity": 0.8924171703882664, "compression_loss": 105.57772064208984, "distillation_loss": 4.177315711975098, "epoch": 4.83, "learning_rate": 2.8745186437494132e-05, "loss": 109.5534, "step": 5709, "task_loss": 2.128593683242798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998020788685317, "compression/movement_sparsity/importance_threshold": -1.4078324374350998e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241174112689299, "compression/movement_sparsity/model_sparsity": 0.8923711352741598, "compression_loss": 105.57733917236328, "distillation_loss": 3.8352389335632324, "epoch": 4.83, "learning_rate": 2.8740490278951822e-05, "loss": 109.4765, "step": 5710, "task_loss": 2.448417901992798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999804933455929, "compression/movement_sparsity/importance_threshold": -1.3875274770518836e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241667177021041, "compression/movement_sparsity/model_sparsity": 0.8924187478796702, "compression_loss": 105.5770263671875, "distillation_loss": 4.578286170959473, "epoch": 4.83, "learning_rate": 2.8735794120409505e-05, "loss": 110.1307, "step": 5711, "task_loss": 2.5919930934906006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998077604629874, "compression/movement_sparsity/importance_threshold": -1.3674186983272593e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241238980161237, "compression/movement_sparsity/model_sparsity": 0.8923773991816321, "compression_loss": 105.57677459716797, "distillation_loss": 3.978867530822754, "epoch": 4.83, "learning_rate": 2.873109796186719e-05, "loss": 109.0709, "step": 5712, "task_loss": 2.3670854568481445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998105600235919, "compression/movement_sparsity/importance_threshold": -1.3475051489223247e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241288942423632, "compression/movement_sparsity/model_sparsity": 0.89238222377213, "compression_loss": 105.5763931274414, "distillation_loss": 3.83671498298645, "epoch": 4.83, "learning_rate": 2.872640180332488e-05, "loss": 109.0667, "step": 5713, "task_loss": 3.0400702953338623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998133322716277, "compression/movement_sparsity/importance_threshold": -1.3277858765007797e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241321733884631, "compression/movement_sparsity/model_sparsity": 0.8923853902694734, "compression_loss": 105.57605743408203, "distillation_loss": 6.068419933319092, "epoch": 4.83, "learning_rate": 2.872170564478257e-05, "loss": 109.8972, "step": 5714, "task_loss": 3.8165392875671387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99981607734098, "compression/movement_sparsity/importance_threshold": -1.308259928723722e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.924144717612816, "compression/movement_sparsity/model_sparsity": 0.8923975035611292, "compression_loss": 105.57576751708984, "distillation_loss": 4.397703170776367, "epoch": 4.83, "learning_rate": 2.871700948624026e-05, "loss": 109.4394, "step": 5715, "task_loss": 2.6513664722442627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999818795365534, "compression/movement_sparsity/importance_threshold": -1.2889263532513823e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241771155762826, "compression/movement_sparsity/model_sparsity": 0.8924287885548831, "compression_loss": 105.57542419433594, "distillation_loss": 4.698570728302002, "epoch": 4.83, "learning_rate": 2.8712313327697943e-05, "loss": 109.6093, "step": 5716, "task_loss": 3.370211601257324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998214864791746, "compression/movement_sparsity/importance_threshold": -1.2697841977474603e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.924171797397517, "compression/movement_sparsity/model_sparsity": 0.8924236530719186, "compression_loss": 105.57508850097656, "distillation_loss": 3.789975166320801, "epoch": 4.83, "learning_rate": 2.8707617169155633e-05, "loss": 109.9782, "step": 5717, "task_loss": 2.1264123916625977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998241508157871, "compression/movement_sparsity/importance_threshold": -1.2508325098721865e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241764358987273, "compression/movement_sparsity/model_sparsity": 0.8924281322263428, "compression_loss": 105.57476043701172, "distillation_loss": 4.760428428649902, "epoch": 4.83, "learning_rate": 2.870292101061332e-05, "loss": 109.2588, "step": 5718, "task_loss": 1.994410753250122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998267885092567, "compression/movement_sparsity/importance_threshold": -1.2320703372892608e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241576911072038, "compression/movement_sparsity/model_sparsity": 0.8924100313760738, "compression_loss": 105.57445526123047, "distillation_loss": 4.596020698547363, "epoch": 4.83, "learning_rate": 2.869822485207101e-05, "loss": 108.7434, "step": 5719, "task_loss": 2.8644955158233643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998293996934683, "compression/movement_sparsity/importance_threshold": -1.2134967276606484e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241501073365874, "compression/movement_sparsity/model_sparsity": 0.8924027081313084, "compression_loss": 105.57405090332031, "distillation_loss": 4.143733978271484, "epoch": 4.83, "learning_rate": 2.8693528693528692e-05, "loss": 110.3384, "step": 5720, "task_loss": 2.615748882293701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998319845023073, "compression/movement_sparsity/importance_threshold": -1.1951107286465798e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241377181264138, "compression/movement_sparsity/model_sparsity": 0.8923907445286179, "compression_loss": 105.57373046875, "distillation_loss": 2.6124534606933594, "epoch": 4.84, "learning_rate": 2.8688832534986382e-05, "loss": 108.6016, "step": 5721, "task_loss": 1.5687048435211182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998345430696587, "compression/movement_sparsity/importance_threshold": -1.1769113879090201e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241202015241566, "compression/movement_sparsity/model_sparsity": 0.8923738296755357, "compression_loss": 105.57328796386719, "distillation_loss": 3.0981287956237793, "epoch": 4.84, "learning_rate": 2.868413637644407e-05, "loss": 109.4371, "step": 5722, "task_loss": 1.4212117195129395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998370755294077, "compression/movement_sparsity/importance_threshold": -1.158897753110802e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.924107418816451, "compression/movement_sparsity/model_sparsity": 0.8923614860931639, "compression_loss": 105.57291412353516, "distillation_loss": 3.995809555053711, "epoch": 4.84, "learning_rate": 2.8679440217901758e-05, "loss": 108.9872, "step": 5723, "task_loss": 1.8057414293289185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998395820154394, "compression/movement_sparsity/importance_threshold": -1.141068871913023e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9241656206786817, "compression/movement_sparsity/model_sparsity": 0.8924176885423771, "compression_loss": 105.57262420654297, "distillation_loss": 3.54580020904541, "epoch": 4.84, "learning_rate": 2.8674744059359444e-05, "loss": 110.0325, "step": 5724, "task_loss": 2.4581167697906494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999842062661639, "compression/movement_sparsity/importance_threshold": -1.1234237919776485e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9242057097302734, "compression/movement_sparsity/model_sparsity": 0.8924564004117184, "compression_loss": 105.57219696044922, "distillation_loss": 3.1563146114349365, "epoch": 4.84, "learning_rate": 2.867004790081713e-05, "loss": 109.1448, "step": 5725, "task_loss": 1.8296661376953125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998445176018915, "compression/movement_sparsity/importance_threshold": -1.1059615609683784e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9242629695832608, "compression/movement_sparsity/model_sparsity": 0.8925116932126037, "compression_loss": 105.57185363769531, "distillation_loss": 3.3262510299682617, "epoch": 4.84, "learning_rate": 2.866535174227482e-05, "loss": 109.5568, "step": 5726, "task_loss": 1.5124826431274414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998469469700823, "compression/movement_sparsity/importance_threshold": -1.0886812265437085e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9243069459135019, "compression/movement_sparsity/model_sparsity": 0.892554158820614, "compression_loss": 105.5715103149414, "distillation_loss": 4.789102554321289, "epoch": 4.84, "learning_rate": 2.866065558373251e-05, "loss": 109.2901, "step": 5727, "task_loss": 2.6658859252929688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998493509000963, "compression/movement_sparsity/importance_threshold": -1.0715818363673385e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9242962857076354, "compression/movement_sparsity/model_sparsity": 0.8925438648256137, "compression_loss": 105.57110595703125, "distillation_loss": 4.270772933959961, "epoch": 4.84, "learning_rate": 2.8655959425190193e-05, "loss": 109.389, "step": 5728, "task_loss": 3.2612452507019043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998517295258187, "compression/movement_sparsity/importance_threshold": -1.054662438102101e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9242980266361103, "compression/movement_sparsity/model_sparsity": 0.8925455459478396, "compression_loss": 105.57064056396484, "distillation_loss": 3.7361996173858643, "epoch": 4.84, "learning_rate": 2.8651263266647883e-05, "loss": 110.2107, "step": 5729, "task_loss": 1.869370698928833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998540829811348, "compression/movement_sparsity/importance_threshold": -1.0379220794073593e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.92431336111569, "compression/movement_sparsity/model_sparsity": 0.8925603536408715, "compression_loss": 105.57028198242188, "distillation_loss": 5.268033027648926, "epoch": 4.84, "learning_rate": 2.8646567108105572e-05, "loss": 109.6008, "step": 5730, "task_loss": 2.508941650390625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998564113999294, "compression/movement_sparsity/importance_threshold": -1.0213598079476804e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9242553977368121, "compression/movement_sparsity/model_sparsity": 0.8925043814823742, "compression_loss": 105.56991577148438, "distillation_loss": 4.421067714691162, "epoch": 4.84, "learning_rate": 2.864187094956326e-05, "loss": 109.5741, "step": 5731, "task_loss": 2.892493963241577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998587149160879, "compression/movement_sparsity/importance_threshold": -1.004974671383295e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9243199074837221, "compression/movement_sparsity/model_sparsity": 0.8925666751210226, "compression_loss": 105.56944274902344, "distillation_loss": 5.303094863891602, "epoch": 4.84, "learning_rate": 2.863717479102095e-05, "loss": 110.3291, "step": 5732, "task_loss": 2.8851449489593506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998609936634955, "compression/movement_sparsity/importance_threshold": -9.887657173753006e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9243128960731521, "compression/movement_sparsity/model_sparsity": 0.8925599045739755, "compression_loss": 105.56912231445312, "distillation_loss": 4.337189197540283, "epoch": 4.85, "learning_rate": 2.863247863247863e-05, "loss": 109.6264, "step": 5733, "task_loss": 3.063685655593872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998632477760372, "compression/movement_sparsity/importance_threshold": -9.727319935873974e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9243850730598521, "compression/movement_sparsity/model_sparsity": 0.8926296020591399, "compression_loss": 105.5687255859375, "distillation_loss": 3.098179340362549, "epoch": 4.85, "learning_rate": 2.862778247393632e-05, "loss": 109.0962, "step": 5734, "task_loss": 3.1703550815582275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999865477387598, "compression/movement_sparsity/importance_threshold": -9.56872547680683e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924368081120971, "compression/movement_sparsity/model_sparsity": 0.8926131938456326, "compression_loss": 105.56829071044922, "distillation_loss": 4.947171211242676, "epoch": 4.85, "learning_rate": 2.862308631539401e-05, "loss": 110.4709, "step": 5735, "task_loss": 3.180368423461914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998676826320634, "compression/movement_sparsity/importance_threshold": -9.411864273171228e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244029712354735, "compression/movement_sparsity/model_sparsity": 0.8926468853773675, "compression_loss": 105.56787109375, "distillation_loss": 3.9902570247650146, "epoch": 4.85, "learning_rate": 2.8618390156851697e-05, "loss": 109.2275, "step": 5736, "task_loss": 1.5359686613082886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998698636433183, "compression/movement_sparsity/importance_threshold": -9.25672680156947e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9243965321849501, "compression/movement_sparsity/model_sparsity": 0.8926406675280384, "compression_loss": 105.5674819946289, "distillation_loss": 3.3580570220947266, "epoch": 4.85, "learning_rate": 2.8613693998309384e-05, "loss": 109.5122, "step": 5737, "task_loss": 1.9163830280303955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998720205552478, "compression/movement_sparsity/importance_threshold": -9.103303538655905e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244176856583362, "compression/movement_sparsity/model_sparsity": 0.8926610943145379, "compression_loss": 105.5670394897461, "distillation_loss": 5.143896102905273, "epoch": 4.85, "learning_rate": 2.860899783976707e-05, "loss": 109.9472, "step": 5738, "task_loss": 3.245107650756836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998741535017373, "compression/movement_sparsity/importance_threshold": -8.951584961015488e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9243772269575476, "compression/movement_sparsity/model_sparsity": 0.892622025494587, "compression_loss": 105.56665802001953, "distillation_loss": 6.234442710876465, "epoch": 4.85, "learning_rate": 2.860430168122476e-05, "loss": 109.6261, "step": 5739, "task_loss": 2.922332525253296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998762626166716, "compression/movement_sparsity/importance_threshold": -8.801561545276546e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244575958474134, "compression/movement_sparsity/model_sparsity": 0.8926996334658422, "compression_loss": 105.56625366210938, "distillation_loss": 4.054473400115967, "epoch": 4.85, "learning_rate": 2.859960552268245e-05, "loss": 109.1605, "step": 5740, "task_loss": 3.2912018299102783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999878348033936, "compression/movement_sparsity/importance_threshold": -8.653223768076077e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244444196421757, "compression/movement_sparsity/model_sparsity": 0.8926869099037893, "compression_loss": 105.5657730102539, "distillation_loss": 4.274738311767578, "epoch": 4.85, "learning_rate": 2.8594909364140132e-05, "loss": 109.798, "step": 5741, "task_loss": 2.5474166870117188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998804098874158, "compression/movement_sparsity/importance_threshold": -8.506562105999038e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244481280583106, "compression/movement_sparsity/model_sparsity": 0.8926904909244213, "compression_loss": 105.56538391113281, "distillation_loss": 3.7280690670013428, "epoch": 4.85, "learning_rate": 2.8590213205597822e-05, "loss": 109.8059, "step": 5742, "task_loss": 2.3714048862457275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998824483109959, "compression/movement_sparsity/importance_threshold": -8.361567035691103e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244481519066458, "compression/movement_sparsity/model_sparsity": 0.892690513953493, "compression_loss": 105.56491088867188, "distillation_loss": 4.427037239074707, "epoch": 4.85, "learning_rate": 2.858551704705551e-05, "loss": 110.0798, "step": 5743, "task_loss": 1.7987884283065796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998844634385615, "compression/movement_sparsity/importance_threshold": -8.21822903376325e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244408781643879, "compression/movement_sparsity/model_sparsity": 0.8926834900866583, "compression_loss": 105.56448364257812, "distillation_loss": 5.86729097366333, "epoch": 4.85, "learning_rate": 2.8580820888513198e-05, "loss": 109.4011, "step": 5744, "task_loss": 3.5226378440856934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998864554039979, "compression/movement_sparsity/importance_threshold": -8.076538576817782e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245142475678514, "compression/movement_sparsity/model_sparsity": 0.8927543390254021, "compression_loss": 105.56401062011719, "distillation_loss": 2.304150104522705, "epoch": 4.86, "learning_rate": 2.8576124729970888e-05, "loss": 108.569, "step": 5745, "task_loss": 1.7232531309127808 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99988842434119, "compression/movement_sparsity/importance_threshold": -7.936486141500373e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244728945544902, "compression/movement_sparsity/model_sparsity": 0.8927144066152666, "compression_loss": 105.56350708007812, "distillation_loss": 4.413586616516113, "epoch": 4.86, "learning_rate": 2.857142857142857e-05, "loss": 109.2409, "step": 5746, "task_loss": 3.613898992538452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998903703840231, "compression/movement_sparsity/importance_threshold": -7.798062204413328e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244700804509282, "compression/movement_sparsity/model_sparsity": 0.8927116891848191, "compression_loss": 105.56303405761719, "distillation_loss": 5.023900032043457, "epoch": 4.86, "learning_rate": 2.856673241288626e-05, "loss": 109.7119, "step": 5747, "task_loss": 1.549482822418213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998922936663822, "compression/movement_sparsity/importance_threshold": -7.661257242184971e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924491400862661, "compression/movement_sparsity/model_sparsity": 0.8927322771748197, "compression_loss": 105.5625228881836, "distillation_loss": 4.03770637512207, "epoch": 4.86, "learning_rate": 2.856203625434395e-05, "loss": 109.4771, "step": 5748, "task_loss": 3.7496254444122314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998941943221527, "compression/movement_sparsity/importance_threshold": -7.526061731417608e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245036946794937, "compression/movement_sparsity/model_sparsity": 0.8927441486612239, "compression_loss": 105.56202697753906, "distillation_loss": 3.7372589111328125, "epoch": 4.86, "learning_rate": 2.8557340095801637e-05, "loss": 109.5751, "step": 5749, "task_loss": 2.988196849822998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998960724852195, "compression/movement_sparsity/importance_threshold": -7.392466148739563e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245691225873116, "compression/movement_sparsity/model_sparsity": 0.8928073289191286, "compression_loss": 105.56155395507812, "distillation_loss": 4.939544200897217, "epoch": 4.86, "learning_rate": 2.855264393725932e-05, "loss": 109.4135, "step": 5750, "task_loss": 2.717559814453125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998979282894678, "compression/movement_sparsity/importance_threshold": -7.260460970770488e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9246001015748296, "compression/movement_sparsity/model_sparsity": 0.8928372436831228, "compression_loss": 105.56111907958984, "distillation_loss": 4.415658473968506, "epoch": 4.86, "learning_rate": 2.854794777871701e-05, "loss": 109.7934, "step": 5751, "task_loss": 2.511021137237549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998997618687827, "compression/movement_sparsity/importance_threshold": -7.130036674138709e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245906337857267, "compression/movement_sparsity/model_sparsity": 0.892828101141702, "compression_loss": 105.56060028076172, "distillation_loss": 4.405801773071289, "epoch": 4.86, "learning_rate": 2.85432516201747e-05, "loss": 110.0658, "step": 5752, "task_loss": 2.562784194946289 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999015733570495, "compression/movement_sparsity/importance_threshold": -7.001183735429184e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924558402760607, "compression/movement_sparsity/model_sparsity": 0.8927969773514493, "compression_loss": 105.56018829345703, "distillation_loss": 5.576191425323486, "epoch": 4.86, "learning_rate": 2.853855546163239e-05, "loss": 109.5265, "step": 5753, "task_loss": 2.152772903442383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999033628881532, "compression/movement_sparsity/importance_threshold": -6.873892631304931e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245998273189739, "compression/movement_sparsity/model_sparsity": 0.8928369788487995, "compression_loss": 105.55976104736328, "distillation_loss": 4.337490081787109, "epoch": 4.86, "learning_rate": 2.8533859303090072e-05, "loss": 109.2975, "step": 5754, "task_loss": 2.147925615310669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999905130595979, "compression/movement_sparsity/importance_threshold": -6.748153838350909e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9246168073336875, "compression/movement_sparsity/model_sparsity": 0.8928533755477709, "compression_loss": 105.55928802490234, "distillation_loss": 3.4317445755004883, "epoch": 4.86, "learning_rate": 2.852916314454776e-05, "loss": 109.6607, "step": 5755, "task_loss": 2.6321327686309814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999906876614412, "compression/movement_sparsity/importance_threshold": -6.623957833195443e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9246341808459329, "compression/movement_sparsity/model_sparsity": 0.8928701522264236, "compression_loss": 105.5588607788086, "distillation_loss": 3.4477553367614746, "epoch": 4.87, "learning_rate": 2.8524466986005448e-05, "loss": 109.2549, "step": 5756, "task_loss": 1.7002716064453125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999086010773374, "compression/movement_sparsity/importance_threshold": -6.501295092466858e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924570613108266, "compression/movement_sparsity/model_sparsity": 0.8928087682361029, "compression_loss": 105.55839538574219, "distillation_loss": 3.5324783325195312, "epoch": 4.87, "learning_rate": 2.8519770827463138e-05, "loss": 109.5099, "step": 5757, "task_loss": 1.8845869302749634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999103041186402, "compression/movement_sparsity/importance_threshold": -6.380156092767458e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245828592284281, "compression/movement_sparsity/model_sparsity": 0.8928205936643638, "compression_loss": 105.55802154541016, "distillation_loss": 4.977496147155762, "epoch": 4.87, "learning_rate": 2.851507466892082e-05, "loss": 110.0716, "step": 5758, "task_loss": 4.235260009765625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999119858722058, "compression/movement_sparsity/importance_threshold": -6.260531310734244e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245720082358795, "compression/movement_sparsity/model_sparsity": 0.8928101154367909, "compression_loss": 105.55754089355469, "distillation_loss": 3.980117082595825, "epoch": 4.87, "learning_rate": 2.851037851037851e-05, "loss": 110.2358, "step": 5759, "task_loss": 3.1079046726226807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999913646471919, "compression/movement_sparsity/importance_threshold": -6.142411222960845e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245359376287811, "compression/movement_sparsity/model_sparsity": 0.8927752839660124, "compression_loss": 105.55715942382812, "distillation_loss": 4.594130039215088, "epoch": 4.87, "learning_rate": 2.85056823518362e-05, "loss": 109.5565, "step": 5760, "task_loss": 1.810420036315918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999152860516652, "compression/movement_sparsity/importance_threshold": -6.025786306092934e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245069976739287, "compression/movement_sparsity/model_sparsity": 0.892747338187639, "compression_loss": 105.55667114257812, "distillation_loss": 5.56600284576416, "epoch": 4.87, "learning_rate": 2.850098619329389e-05, "loss": 110.8511, "step": 5761, "task_loss": 3.173851490020752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999169047453296, "compression/movement_sparsity/importance_threshold": -5.910647036732816e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924500308215885, "compression/movement_sparsity/model_sparsity": 0.8927408785330583, "compression_loss": 105.55622863769531, "distillation_loss": 3.7308735847473145, "epoch": 4.87, "learning_rate": 2.8496290034751576e-05, "loss": 109.5291, "step": 5762, "task_loss": 1.3807402849197388 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999918502686797, "compression/movement_sparsity/importance_threshold": -5.796983891500143e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244933087294828, "compression/movement_sparsity/model_sparsity": 0.8927341195005469, "compression_loss": 105.55574035644531, "distillation_loss": 3.7910900115966797, "epoch": 4.87, "learning_rate": 2.849159387620926e-05, "loss": 109.6727, "step": 5763, "task_loss": 1.8973743915557861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999200800099528, "compression/movement_sparsity/importance_threshold": -5.68478734702324e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245176221072923, "compression/movement_sparsity/model_sparsity": 0.892757597639032, "compression_loss": 105.55524444580078, "distillation_loss": 3.20263671875, "epoch": 4.87, "learning_rate": 2.848689771766695e-05, "loss": 109.0467, "step": 5764, "task_loss": 1.6284337043762207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999216368486822, "compression/movement_sparsity/importance_threshold": -5.574047879895738e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9246091997147358, "compression/movement_sparsity/model_sparsity": 0.892846029273934, "compression_loss": 105.5547866821289, "distillation_loss": 3.465578079223633, "epoch": 4.87, "learning_rate": 2.848220155912464e-05, "loss": 109.8939, "step": 5765, "task_loss": 2.4563357830047607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999231733368701, "compression/movement_sparsity/importance_threshold": -5.464755966771984e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245903356815358, "compression/movement_sparsity/model_sparsity": 0.8928278132783071, "compression_loss": 105.55426025390625, "distillation_loss": 4.692588806152344, "epoch": 4.87, "learning_rate": 2.8477505400582328e-05, "loss": 109.9541, "step": 5766, "task_loss": 2.3017096519470215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999246896084018, "compression/movement_sparsity/importance_threshold": -5.356902084245607e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245859595120135, "compression/movement_sparsity/model_sparsity": 0.8928235874436704, "compression_loss": 105.55378723144531, "distillation_loss": 4.690952777862549, "epoch": 4.87, "learning_rate": 2.847280924204001e-05, "loss": 109.4363, "step": 5767, "task_loss": 2.367199182510376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999261857971624, "compression/movement_sparsity/importance_threshold": -5.250476708936261e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9246204322806487, "compression/movement_sparsity/model_sparsity": 0.8928568759666525, "compression_loss": 105.55327606201172, "distillation_loss": 4.960714340209961, "epoch": 4.88, "learning_rate": 2.84681130834977e-05, "loss": 110.098, "step": 5768, "task_loss": 3.194319725036621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999276620370371, "compression/movement_sparsity/importance_threshold": -5.14547031747227e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245599767507349, "compression/movement_sparsity/model_sparsity": 0.8927984972701741, "compression_loss": 105.55278778076172, "distillation_loss": 4.005005359649658, "epoch": 4.88, "learning_rate": 2.8463416924955387e-05, "loss": 109.5823, "step": 5769, "task_loss": 2.4872078895568848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999291184619109, "compression/movement_sparsity/importance_threshold": -5.041873386473286e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245582954430982, "compression/movement_sparsity/model_sparsity": 0.8927968737206271, "compression_loss": 105.55229187011719, "distillation_loss": 3.9163007736206055, "epoch": 4.88, "learning_rate": 2.8458720766413077e-05, "loss": 109.0882, "step": 5770, "task_loss": 1.4560729265213013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999305552056691, "compression/movement_sparsity/importance_threshold": -4.939676392550288e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245771714004658, "compression/movement_sparsity/model_sparsity": 0.8928151012307899, "compression_loss": 105.55183410644531, "distillation_loss": 3.553004741668701, "epoch": 4.88, "learning_rate": 2.845402460787076e-05, "loss": 109.545, "step": 5771, "task_loss": 1.9791879653930664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999319724021967, "compression/movement_sparsity/importance_threshold": -4.838869812322927e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245747150219329, "compression/movement_sparsity/model_sparsity": 0.8928127292364162, "compression_loss": 105.55136108398438, "distillation_loss": 3.2811408042907715, "epoch": 4.88, "learning_rate": 2.844932844932845e-05, "loss": 109.6898, "step": 5772, "task_loss": 2.0989317893981934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999933370185379, "compression/movement_sparsity/importance_threshold": -4.739444122402181e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9246079238287988, "compression/movement_sparsity/model_sparsity": 0.892844797218604, "compression_loss": 105.55088806152344, "distillation_loss": 4.348297119140625, "epoch": 4.88, "learning_rate": 2.844463229078614e-05, "loss": 109.6767, "step": 5773, "task_loss": 2.119016408920288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999347486891009, "compression/movement_sparsity/importance_threshold": -4.6413897994250497e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9245926966667277, "compression/movement_sparsity/model_sparsity": 0.8928300931563944, "compression_loss": 105.55043029785156, "distillation_loss": 4.1113972663879395, "epoch": 4.88, "learning_rate": 2.843993613224383e-05, "loss": 109.1276, "step": 5774, "task_loss": 1.8244227170944214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999361080472479, "compression/movement_sparsity/importance_threshold": -4.5446973199938373e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9246298166005782, "compression/movement_sparsity/model_sparsity": 0.8928659379063227, "compression_loss": 105.55003356933594, "distillation_loss": 3.1407675743103027, "epoch": 4.88, "learning_rate": 2.8435239973701515e-05, "loss": 109.8815, "step": 5775, "task_loss": 1.3444616794586182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999374483937048, "compression/movement_sparsity/importance_threshold": -4.4493571607368693e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9246757604184791, "compression/movement_sparsity/model_sparsity": 0.8929103034127391, "compression_loss": 105.54955291748047, "distillation_loss": 3.4795708656311035, "epoch": 4.88, "learning_rate": 2.84305438151592e-05, "loss": 109.8239, "step": 5776, "task_loss": 2.5741770267486572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999387698623569, "compression/movement_sparsity/importance_threshold": -4.3553597982737974e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9246998114646007, "compression/movement_sparsity/model_sparsity": 0.8929335282314367, "compression_loss": 105.54911804199219, "distillation_loss": 2.359997510910034, "epoch": 4.88, "learning_rate": 2.8425847656616888e-05, "loss": 108.8979, "step": 5777, "task_loss": 2.325786590576172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999400725870893, "compression/movement_sparsity/importance_threshold": -4.2626957092156e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9247032813973827, "compression/movement_sparsity/model_sparsity": 0.892936878961353, "compression_loss": 105.54862976074219, "distillation_loss": 4.322570323944092, "epoch": 4.88, "learning_rate": 2.8421151498074578e-05, "loss": 109.9081, "step": 5778, "task_loss": 2.926050901412964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999413567017872, "compression/movement_sparsity/importance_threshold": -4.171355370173255e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9246599966688646, "compression/movement_sparsity/model_sparsity": 0.8928950811964188, "compression_loss": 105.54817962646484, "distillation_loss": 3.256303548812866, "epoch": 4.88, "learning_rate": 2.8416455339532268e-05, "loss": 108.8037, "step": 5779, "task_loss": 1.4174340963363647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999426223403356, "compression/movement_sparsity/importance_threshold": -4.081329257792435e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9246923946323312, "compression/movement_sparsity/model_sparsity": 0.8929263661901725, "compression_loss": 105.54768371582031, "distillation_loss": 5.028491497039795, "epoch": 4.89, "learning_rate": 2.841175918098995e-05, "loss": 109.6316, "step": 5780, "task_loss": 3.056548833847046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999438696366199, "compression/movement_sparsity/importance_threshold": -3.9926078486580974e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9247173042185225, "compression/movement_sparsity/model_sparsity": 0.8929504200554473, "compression_loss": 105.54718017578125, "distillation_loss": 3.3077354431152344, "epoch": 4.89, "learning_rate": 2.840706302244764e-05, "loss": 109.5583, "step": 5781, "task_loss": 1.9038091897964478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999450987245249, "compression/movement_sparsity/importance_threshold": -3.905181619415915e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.92472498338248, "compression/movement_sparsity/model_sparsity": 0.892957835416499, "compression_loss": 105.54666900634766, "distillation_loss": 5.022045135498047, "epoch": 4.89, "learning_rate": 2.8402366863905327e-05, "loss": 110.3303, "step": 5782, "task_loss": 1.9528627395629883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999946309737936, "compression/movement_sparsity/importance_threshold": -3.8190410466681923e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9247211318763336, "compression/movement_sparsity/model_sparsity": 0.8929541162214374, "compression_loss": 105.5461654663086, "distillation_loss": 4.307309150695801, "epoch": 4.89, "learning_rate": 2.8397670705363016e-05, "loss": 109.5898, "step": 5783, "task_loss": 2.463567018508911 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999475028107382, "compression/movement_sparsity/importance_threshold": -3.7341766070432547e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9247911982853619, "compression/movement_sparsity/model_sparsity": 0.8930217756337661, "compression_loss": 105.54570007324219, "distillation_loss": 3.5218613147735596, "epoch": 4.89, "learning_rate": 2.83929745468207e-05, "loss": 109.0387, "step": 5784, "task_loss": 2.4120700359344482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999486780768168, "compression/movement_sparsity/importance_threshold": -3.650578777160754e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9247698182527908, "compression/movement_sparsity/model_sparsity": 0.8930011300710865, "compression_loss": 105.5451889038086, "distillation_loss": 5.07768440246582, "epoch": 4.89, "learning_rate": 2.838827838827839e-05, "loss": 109.1705, "step": 5785, "task_loss": 3.068242073059082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999498356700568, "compression/movement_sparsity/importance_threshold": -3.5682380336229946e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924838382216697, "compression/movement_sparsity/model_sparsity": 0.8930673386519051, "compression_loss": 105.54472351074219, "distillation_loss": 5.087841033935547, "epoch": 4.89, "learning_rate": 2.838358222973608e-05, "loss": 109.9642, "step": 5786, "task_loss": 3.6004881858825684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999509757243433, "compression/movement_sparsity/importance_threshold": -3.487144853058302e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9248081783000754, "compression/movement_sparsity/model_sparsity": 0.8930381723327375, "compression_loss": 105.54426574707031, "distillation_loss": 5.612716197967529, "epoch": 4.89, "learning_rate": 2.8378886071193765e-05, "loss": 109.3847, "step": 5787, "task_loss": 3.3475773334503174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999520983735616, "compression/movement_sparsity/importance_threshold": -3.407289712095002e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9248035994197032, "compression/movement_sparsity/model_sparsity": 0.8930337507509924, "compression_loss": 105.54375457763672, "distillation_loss": 4.73354434967041, "epoch": 4.89, "learning_rate": 2.837418991265145e-05, "loss": 109.4836, "step": 5788, "task_loss": 2.167393922805786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999532037515967, "compression/movement_sparsity/importance_threshold": -3.328663087344072e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9247820166762823, "compression/movement_sparsity/model_sparsity": 0.8930129094412043, "compression_loss": 105.54324340820312, "distillation_loss": 4.397701740264893, "epoch": 4.89, "learning_rate": 2.8369493754109138e-05, "loss": 109.7142, "step": 5789, "task_loss": 1.3870161771774292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999542919923339, "compression/movement_sparsity/importance_threshold": -3.251255455416491e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9247846638414975, "compression/movement_sparsity/model_sparsity": 0.8930154656681507, "compression_loss": 105.54273223876953, "distillation_loss": 4.607369422912598, "epoch": 4.89, "learning_rate": 2.8364797595566827e-05, "loss": 109.6297, "step": 5790, "task_loss": 3.480306386947632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999553632296582, "compression/movement_sparsity/importance_threshold": -3.1750572929405835e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9247265096759374, "compression/movement_sparsity/model_sparsity": 0.8929593092770807, "compression_loss": 105.54224395751953, "distillation_loss": 5.068426132202148, "epoch": 4.89, "learning_rate": 2.8360101437024517e-05, "loss": 109.556, "step": 5791, "task_loss": 3.039989471435547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999564175974547, "compression/movement_sparsity/importance_threshold": -3.100059076536002e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9247476631493234, "compression/movement_sparsity/model_sparsity": 0.8929797360635803, "compression_loss": 105.5417251586914, "distillation_loss": 5.764354228973389, "epoch": 4.9, "learning_rate": 2.8355405278482207e-05, "loss": 110.5851, "step": 5792, "task_loss": 2.9764177799224854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999574552296088, "compression/movement_sparsity/importance_threshold": -3.026251282805051e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9247991755535103, "compression/movement_sparsity/model_sparsity": 0.8930294788582126, "compression_loss": 105.5412368774414, "distillation_loss": 3.549630641937256, "epoch": 4.9, "learning_rate": 2.835070911993989e-05, "loss": 109.3827, "step": 5793, "task_loss": 2.2999072074890137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999584762600052, "compression/movement_sparsity/importance_threshold": -2.9536243883934027e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9248161436440562, "compression/movement_sparsity/model_sparsity": 0.8930458640426483, "compression_loss": 105.5407943725586, "distillation_loss": 2.7637240886688232, "epoch": 4.9, "learning_rate": 2.8346012961397576e-05, "loss": 109.4474, "step": 5794, "task_loss": 2.1858761310577393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999594808225295, "compression/movement_sparsity/importance_threshold": -2.882168869886015e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9248407551260566, "compression/movement_sparsity/model_sparsity": 0.8930696300445282, "compression_loss": 105.54032897949219, "distillation_loss": 5.350521564483643, "epoch": 4.9, "learning_rate": 2.8341316802855266e-05, "loss": 110.1825, "step": 5795, "task_loss": 2.882997989654541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999604690510666, "compression/movement_sparsity/importance_threshold": -2.81187520392856e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9248823585469381, "compression/movement_sparsity/model_sparsity": 0.8931098042599154, "compression_loss": 105.53984069824219, "distillation_loss": 3.927022695541382, "epoch": 4.9, "learning_rate": 2.8336620644312956e-05, "loss": 109.2431, "step": 5796, "task_loss": 2.0995266437530518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999614410795017, "compression/movement_sparsity/importance_threshold": -2.742733867132016e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9249432671952221, "compression/movement_sparsity/model_sparsity": 0.8931686205087539, "compression_loss": 105.53934478759766, "distillation_loss": 4.378576755523682, "epoch": 4.9, "learning_rate": 2.833192448577064e-05, "loss": 108.9955, "step": 5797, "task_loss": 1.8007640838623047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99996239704172, "compression/movement_sparsity/importance_threshold": -2.6747353361073617e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9249537366144064, "compression/movement_sparsity/model_sparsity": 0.8931787302711814, "compression_loss": 105.5389175415039, "distillation_loss": 4.3032307624816895, "epoch": 4.9, "learning_rate": 2.8327228327228328e-05, "loss": 109.6754, "step": 5798, "task_loss": 2.7622671127319336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999633370716063, "compression/movement_sparsity/importance_threshold": -2.6078700874915955e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250336762342372, "compression/movement_sparsity/model_sparsity": 0.8932559237191481, "compression_loss": 105.53841400146484, "distillation_loss": 4.2878851890563965, "epoch": 4.9, "learning_rate": 2.8322532168686018e-05, "loss": 109.7774, "step": 5799, "task_loss": 2.4089720249176025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999642613030463, "compression/movement_sparsity/importance_threshold": -2.5421285978870223e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250812178906014, "compression/movement_sparsity/model_sparsity": 0.8933018321733609, "compression_loss": 105.5379638671875, "distillation_loss": 3.5996570587158203, "epoch": 4.9, "learning_rate": 2.8317836010143704e-05, "loss": 109.0766, "step": 5800, "task_loss": 1.6269128322601318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999651698699247, "compression/movement_sparsity/importance_threshold": -2.4775013439132937e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250754227451303, "compression/movement_sparsity/model_sparsity": 0.8932962361089648, "compression_loss": 105.53743743896484, "distillation_loss": 4.595160484313965, "epoch": 4.9, "learning_rate": 2.831313985160139e-05, "loss": 109.2549, "step": 5801, "task_loss": 3.3479394912719727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999660629061268, "compression/movement_sparsity/importance_threshold": -2.4139788021900616e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250998434404486, "compression/movement_sparsity/model_sparsity": 0.893319817878272, "compression_loss": 105.53693389892578, "distillation_loss": 2.8538460731506348, "epoch": 4.9, "learning_rate": 2.8308443693059077e-05, "loss": 109.7438, "step": 5802, "task_loss": 2.681487798690796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999669405455378, "compression/movement_sparsity/importance_threshold": -2.351551449328304e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.925091448826433, "compression/movement_sparsity/model_sparsity": 0.8933117116450726, "compression_loss": 105.5364990234375, "distillation_loss": 4.088194847106934, "epoch": 4.9, "learning_rate": 2.8303747534516767e-05, "loss": 109.1558, "step": 5803, "task_loss": 2.6275651454925537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999678029220427, "compression/movement_sparsity/importance_threshold": -2.2902097619736939e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250718216465044, "compression/movement_sparsity/model_sparsity": 0.8932927587191548, "compression_loss": 105.53597259521484, "distillation_loss": 3.3701605796813965, "epoch": 4.91, "learning_rate": 2.8299051375974456e-05, "loss": 109.7896, "step": 5804, "task_loss": 2.6182374954223633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999686501695267, "compression/movement_sparsity/importance_threshold": -2.2299442167111883e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250502389030835, "compression/movement_sparsity/model_sparsity": 0.8932719174093667, "compression_loss": 105.53550720214844, "distillation_loss": 6.778076171875, "epoch": 4.91, "learning_rate": 2.829435521743214e-05, "loss": 109.8818, "step": 5805, "task_loss": 3.3612723350524902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999969482421875, "compression/movement_sparsity/importance_threshold": -2.17074529018646e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250407114931424, "compression/movement_sparsity/model_sparsity": 0.8932627172952669, "compression_loss": 105.53498840332031, "distillation_loss": 3.39520263671875, "epoch": 4.91, "learning_rate": 2.828965905888983e-05, "loss": 109.3527, "step": 5806, "task_loss": 2.706174373626709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999702998129727, "compression/movement_sparsity/importance_threshold": -2.1126034590018133e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250319591540976, "compression/movement_sparsity/model_sparsity": 0.8932542656259936, "compression_loss": 105.53445434570312, "distillation_loss": 3.8467159271240234, "epoch": 4.91, "learning_rate": 2.8284962900347516e-05, "loss": 109.1021, "step": 5807, "task_loss": 1.9867414236068726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999711024767048, "compression/movement_sparsity/importance_threshold": -2.055509199785574e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250020414174993, "compression/movement_sparsity/model_sparsity": 0.8932253756556852, "compression_loss": 105.53394317626953, "distillation_loss": 3.500413417816162, "epoch": 4.91, "learning_rate": 2.8280266741805205e-05, "loss": 109.6549, "step": 5808, "task_loss": 1.3621433973312378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999718905469568, "compression/movement_sparsity/importance_threshold": -1.9994529891400464e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250291092780327, "compression/movement_sparsity/model_sparsity": 0.8932515136519388, "compression_loss": 105.53341674804688, "distillation_loss": 4.718701362609863, "epoch": 4.91, "learning_rate": 2.8275570583262895e-05, "loss": 109.7971, "step": 5809, "task_loss": 2.840993881225586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999726641576134, "compression/movement_sparsity/importance_threshold": -1.9444253037022297e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250600047963772, "compression/movement_sparsity/model_sparsity": 0.8932813478141824, "compression_loss": 105.5328369140625, "distillation_loss": 3.9062957763671875, "epoch": 4.91, "learning_rate": 2.8270874424720578e-05, "loss": 109.6021, "step": 5810, "task_loss": 2.56333327293396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999734234425601, "compression/movement_sparsity/importance_threshold": -1.8904166200744282e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250660980460391, "compression/movement_sparsity/model_sparsity": 0.8932872317419734, "compression_loss": 105.53223419189453, "distillation_loss": 3.236457586288452, "epoch": 4.91, "learning_rate": 2.8266178266178268e-05, "loss": 109.5463, "step": 5811, "task_loss": 2.1144726276397705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999741685356818, "compression/movement_sparsity/importance_threshold": -1.8374174149023148e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250375396645513, "compression/movement_sparsity/model_sparsity": 0.8932596544287456, "compression_loss": 105.53169250488281, "distillation_loss": 2.410865306854248, "epoch": 4.91, "learning_rate": 2.8261482107635957e-05, "loss": 108.9849, "step": 5812, "task_loss": 1.0472378730773926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999748995708638, "compression/movement_sparsity/importance_threshold": -1.785418164762173e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250751127167718, "compression/movement_sparsity/model_sparsity": 0.8932959367310341, "compression_loss": 105.53109741210938, "distillation_loss": 3.189753532409668, "epoch": 4.91, "learning_rate": 2.8256785949093644e-05, "loss": 108.9151, "step": 5813, "task_loss": 2.396939992904663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999756166819911, "compression/movement_sparsity/importance_threshold": -1.734409346317023e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9251163822609596, "compression/movement_sparsity/model_sparsity": 0.893335788539419, "compression_loss": 105.53056335449219, "distillation_loss": 4.911005020141602, "epoch": 4.91, "learning_rate": 2.8252089790551327e-05, "loss": 109.2708, "step": 5814, "task_loss": 2.8397839069366455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999976320002949, "compression/movement_sparsity/importance_threshold": -1.6843814361518217e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250513478506736, "compression/movement_sparsity/model_sparsity": 0.8932729882611956, "compression_loss": 105.52996826171875, "distillation_loss": 3.358651876449585, "epoch": 4.91, "learning_rate": 2.8247393632009016e-05, "loss": 109.2298, "step": 5815, "task_loss": 1.7787202596664429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999770096676225, "compression/movement_sparsity/importance_threshold": -1.6353249109035684e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9251248603441486, "compression/movement_sparsity/model_sparsity": 0.8933439753743689, "compression_loss": 105.52935028076172, "distillation_loss": 5.158148765563965, "epoch": 4.92, "learning_rate": 2.8242697473466706e-05, "loss": 109.7431, "step": 5816, "task_loss": 2.7166190147399902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999776858098969, "compression/movement_sparsity/importance_threshold": -1.587230247183241e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9251456680166732, "compression/movement_sparsity/model_sparsity": 0.8933640682393305, "compression_loss": 105.52885437011719, "distillation_loss": 4.191144943237305, "epoch": 4.92, "learning_rate": 2.8238001314924396e-05, "loss": 109.22, "step": 5817, "task_loss": 2.330824375152588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999783485636571, "compression/movement_sparsity/importance_threshold": -1.5400879216104918e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9251306077929491, "compression/movement_sparsity/model_sparsity": 0.893349525380622, "compression_loss": 105.52825164794922, "distillation_loss": 4.859598159790039, "epoch": 4.92, "learning_rate": 2.823330515638208e-05, "loss": 109.7416, "step": 5818, "task_loss": 2.0297975540161133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999789980627884, "compression/movement_sparsity/importance_threshold": -1.4938884108049721e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.925161360221282, "compression/movement_sparsity/model_sparsity": 0.8933792213684361, "compression_loss": 105.52772521972656, "distillation_loss": 4.228296279907227, "epoch": 4.92, "learning_rate": 2.822860899783977e-05, "loss": 109.8544, "step": 5819, "task_loss": 2.2615246772766113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999979634441176, "compression/movement_sparsity/importance_threshold": -1.448622191386334e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9250966119910193, "compression/movement_sparsity/model_sparsity": 0.8933166974390717, "compression_loss": 105.52718353271484, "distillation_loss": 4.276165962219238, "epoch": 4.92, "learning_rate": 2.8223912839297455e-05, "loss": 109.808, "step": 5820, "task_loss": 3.1779184341430664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999802578327048, "compression/movement_sparsity/importance_threshold": -1.404279739974229e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9251025383023344, "compression/movement_sparsity/model_sparsity": 0.8933224201633616, "compression_loss": 105.52667236328125, "distillation_loss": 4.539895057678223, "epoch": 4.92, "learning_rate": 2.8219216680755145e-05, "loss": 109.3451, "step": 5821, "task_loss": 3.243807315826416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999808683712603, "compression/movement_sparsity/importance_threshold": -1.3608515331796356e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9251212830938579, "compression/movement_sparsity/model_sparsity": 0.8933405210136306, "compression_loss": 105.5261001586914, "distillation_loss": 3.8887295722961426, "epoch": 4.92, "learning_rate": 2.8214520522212834e-05, "loss": 109.5711, "step": 5822, "task_loss": 2.5705041885375977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999814661907274, "compression/movement_sparsity/importance_threshold": -1.3183280476222053e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9251123638164662, "compression/movement_sparsity/model_sparsity": 0.8933319081408563, "compression_loss": 105.52556610107422, "distillation_loss": 4.185876846313477, "epoch": 4.92, "learning_rate": 2.8209824363670517e-05, "loss": 109.1747, "step": 5823, "task_loss": 3.3275089263916016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999820514249912, "compression/movement_sparsity/importance_threshold": -1.2766997599302637e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9251092993053839, "compression/movement_sparsity/model_sparsity": 0.893328948905157, "compression_loss": 105.52507019042969, "distillation_loss": 4.199438095092773, "epoch": 4.92, "learning_rate": 2.8205128205128207e-05, "loss": 109.3085, "step": 5824, "task_loss": 2.560515880584717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999826242079369, "compression/movement_sparsity/importance_threshold": -1.2359571467234626e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9251066521401687, "compression/movement_sparsity/model_sparsity": 0.8933263926782107, "compression_loss": 105.52445983886719, "distillation_loss": 4.823539733886719, "epoch": 4.92, "learning_rate": 2.8200432046585897e-05, "loss": 109.9195, "step": 5825, "task_loss": 2.563147783279419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999831846734497, "compression/movement_sparsity/importance_threshold": -1.1960906846041064e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9251852085565537, "compression/movement_sparsity/model_sparsity": 0.8934022504400251, "compression_loss": 105.52397155761719, "distillation_loss": 5.0947794914245605, "epoch": 4.92, "learning_rate": 2.8195735888043583e-05, "loss": 109.683, "step": 5826, "task_loss": 3.126951217651367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999837329554148, "compression/movement_sparsity/importance_threshold": -1.1570908502005206e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9251890839110354, "compression/movement_sparsity/model_sparsity": 0.8934059926641583, "compression_loss": 105.5234146118164, "distillation_loss": 3.508413314819336, "epoch": 4.93, "learning_rate": 2.8191039729501266e-05, "loss": 109.1214, "step": 5827, "task_loss": 1.992268443107605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999842691877171, "compression/movement_sparsity/importance_threshold": -1.118948120132357e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9251720919721543, "compression/movement_sparsity/model_sparsity": 0.8933895844506511, "compression_loss": 105.52288055419922, "distillation_loss": 3.294856548309326, "epoch": 4.93, "learning_rate": 2.8186343570958956e-05, "loss": 109.4121, "step": 5828, "task_loss": 1.3759175539016724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999984793504242, "compression/movement_sparsity/importance_threshold": -1.0816529710105938e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9252472976974336, "compression/movement_sparsity/model_sparsity": 0.8934622066279073, "compression_loss": 105.52239227294922, "distillation_loss": 3.825864315032959, "epoch": 4.93, "learning_rate": 2.8181647412416645e-05, "loss": 109.4923, "step": 5829, "task_loss": 2.453969717025757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999853060388745, "compression/movement_sparsity/importance_threshold": -1.0451958794635563e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9252526278003669, "compression/movement_sparsity/model_sparsity": 0.8934673536254074, "compression_loss": 105.5218276977539, "distillation_loss": 4.402005195617676, "epoch": 4.93, "learning_rate": 2.8176951253874335e-05, "loss": 109.6857, "step": 5830, "task_loss": 2.794067144393921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999858069254998, "compression/movement_sparsity/importance_threshold": -1.0095673221022228e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9252375079558046, "compression/movement_sparsity/model_sparsity": 0.89345275319402, "compression_loss": 105.52131652832031, "distillation_loss": 6.9331865310668945, "epoch": 4.93, "learning_rate": 2.8172255095332018e-05, "loss": 109.9758, "step": 5831, "task_loss": 3.373030424118042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999862962980028, "compression/movement_sparsity/importance_threshold": -9.747577755549186e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252161636957364, "compression/movement_sparsity/model_sparsity": 0.8934321421749477, "compression_loss": 105.52079010009766, "distillation_loss": 3.5920162200927734, "epoch": 4.93, "learning_rate": 2.8167558936789708e-05, "loss": 109.1026, "step": 5832, "task_loss": 2.2727773189544678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999986774290269, "compression/movement_sparsity/importance_threshold": -9.40757716432622e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252544045013447, "compression/movement_sparsity/model_sparsity": 0.8934690692912408, "compression_loss": 105.52018737792969, "distillation_loss": 2.572540760040283, "epoch": 4.93, "learning_rate": 2.8162862778247394e-05, "loss": 109.3786, "step": 5833, "task_loss": 1.3933608531951904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999872410361835, "compression/movement_sparsity/importance_threshold": -9.07557621346311e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252032855946897, "compression/movement_sparsity/model_sparsity": 0.8934197064762897, "compression_loss": 105.5196762084961, "distillation_loss": 4.8634538650512695, "epoch": 4.93, "learning_rate": 2.8158166619705084e-05, "loss": 109.4659, "step": 5834, "task_loss": 2.2804956436157227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999876966696312, "compression/movement_sparsity/importance_threshold": -8.751479669329848e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9251546111424002, "compression/movement_sparsity/model_sparsity": 0.8933727041411763, "compression_loss": 105.51919555664062, "distillation_loss": 3.567415475845337, "epoch": 4.93, "learning_rate": 2.8153470461162767e-05, "loss": 109.6984, "step": 5835, "task_loss": 1.989652156829834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999881413244974, "compression/movement_sparsity/importance_threshold": -8.435192297862742e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9251818697896157, "compression/movement_sparsity/model_sparsity": 0.8933990263700027, "compression_loss": 105.51863098144531, "distillation_loss": 4.096807479858398, "epoch": 4.93, "learning_rate": 2.8148774302620457e-05, "loss": 109.5896, "step": 5836, "task_loss": 2.6003410816192627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999885751346672, "compression/movement_sparsity/importance_threshold": -8.12661886551852e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.925191814545424, "compression/movement_sparsity/model_sparsity": 0.8934086294928553, "compression_loss": 105.51811981201172, "distillation_loss": 3.8893446922302246, "epoch": 4.93, "learning_rate": 2.8144078144078146e-05, "loss": 109.4018, "step": 5837, "task_loss": 1.7869887351989746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999889982340258, "compression/movement_sparsity/importance_threshold": -7.825664138406963e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252384261167126, "compression/movement_sparsity/model_sparsity": 0.8934536398132762, "compression_loss": 105.51763916015625, "distillation_loss": 3.4137256145477295, "epoch": 4.93, "learning_rate": 2.8139381985535833e-05, "loss": 109.0225, "step": 5838, "task_loss": 1.8507084846496582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999894107564582, "compression/movement_sparsity/importance_threshold": -7.532232882551115e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252379014533366, "compression/movement_sparsity/model_sparsity": 0.8934531331737012, "compression_loss": 105.51707458496094, "distillation_loss": 3.9156370162963867, "epoch": 4.94, "learning_rate": 2.8134685826993522e-05, "loss": 109.0009, "step": 5839, "task_loss": 1.6748197078704834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999898128358496, "compression/movement_sparsity/importance_threshold": -7.246229864320969e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252148281889613, "compression/movement_sparsity/model_sparsity": 0.8934308525469388, "compression_loss": 105.51659393310547, "distillation_loss": 5.036463737487793, "epoch": 4.94, "learning_rate": 2.8129989668451205e-05, "loss": 109.522, "step": 5840, "task_loss": 2.0409927368164062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999902046060852, "compression/movement_sparsity/importance_threshold": -6.967559849913041e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9251896443469143, "compression/movement_sparsity/model_sparsity": 0.8934065338473407, "compression_loss": 105.51610565185547, "distillation_loss": 4.756108283996582, "epoch": 4.94, "learning_rate": 2.8125293509908895e-05, "loss": 108.6948, "step": 5841, "task_loss": 2.5615694522857666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999905862010502, "compression/movement_sparsity/importance_threshold": -6.696127605350377e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252089376501491, "compression/movement_sparsity/model_sparsity": 0.8934251643662563, "compression_loss": 105.51556396484375, "distillation_loss": 2.815988063812256, "epoch": 4.94, "learning_rate": 2.8120597351366585e-05, "loss": 109.5514, "step": 5842, "task_loss": 1.0895349979400635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999909577546297, "compression/movement_sparsity/importance_threshold": -6.431837896829495e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9251641743248441, "compression/movement_sparsity/model_sparsity": 0.8933819387988835, "compression_loss": 105.51502227783203, "distillation_loss": 3.88344144821167, "epoch": 4.94, "learning_rate": 2.8115901192824275e-05, "loss": 108.6043, "step": 5843, "task_loss": 1.953026533126831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999913194007086, "compression/movement_sparsity/importance_threshold": -6.174595490720386e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9251394436011673, "compression/movement_sparsity/model_sparsity": 0.8933580576516457, "compression_loss": 105.5145492553711, "distillation_loss": 5.447986602783203, "epoch": 4.94, "learning_rate": 2.8111205034281958e-05, "loss": 109.2888, "step": 5844, "task_loss": 4.2401204109191895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999916712731723, "compression/movement_sparsity/importance_threshold": -5.9243051530460944e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9251440105573718, "compression/movement_sparsity/model_sparsity": 0.893362467718855, "compression_loss": 105.51392364501953, "distillation_loss": 4.707202434539795, "epoch": 4.94, "learning_rate": 2.8106508875739644e-05, "loss": 109.6583, "step": 5845, "task_loss": 2.1136560440063477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999920135059059, "compression/movement_sparsity/importance_threshold": -5.6808716500031387e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9251052450883877, "compression/movement_sparsity/model_sparsity": 0.8933250339629869, "compression_loss": 105.51341247558594, "distillation_loss": 3.980879306793213, "epoch": 4.94, "learning_rate": 2.8101812717197334e-05, "loss": 109.1225, "step": 5846, "task_loss": 2.6232659816741943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999923462327946, "compression/movement_sparsity/importance_threshold": -5.4441997477880366e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9250748503850839, "compression/movement_sparsity/model_sparsity": 0.8932956834112467, "compression_loss": 105.5128173828125, "distillation_loss": 3.136040210723877, "epoch": 4.94, "learning_rate": 2.8097116558655023e-05, "loss": 108.8085, "step": 5847, "task_loss": 1.3348658084869385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999926695877234, "compression/movement_sparsity/importance_threshold": -5.2141942127707785e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9250955388159321, "compression/movement_sparsity/model_sparsity": 0.8933156611308501, "compression_loss": 105.51226806640625, "distillation_loss": 3.8981823921203613, "epoch": 4.94, "learning_rate": 2.8092420400112706e-05, "loss": 109.1951, "step": 5848, "task_loss": 2.219625234603882 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999929837045775, "compression/movement_sparsity/importance_threshold": -4.9907598108009377e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.925104708500844, "compression/movement_sparsity/model_sparsity": 0.8933245158088762, "compression_loss": 105.51173400878906, "distillation_loss": 4.115935325622559, "epoch": 4.94, "learning_rate": 2.8087724241570396e-05, "loss": 109.3846, "step": 5849, "task_loss": 1.4969440698623657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999993288717242, "compression/movement_sparsity/importance_threshold": -4.7738013083352404e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9250988656587025, "compression/movement_sparsity/model_sparsity": 0.8933188736863369, "compression_loss": 105.5111312866211, "distillation_loss": 3.603992462158203, "epoch": 4.94, "learning_rate": 2.8083028083028086e-05, "loss": 109.5175, "step": 5850, "task_loss": 2.138160228729248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999935847596021, "compression/movement_sparsity/importance_threshold": -4.5632234714834685e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9251143193799586, "compression/movement_sparsity/model_sparsity": 0.8933337965247266, "compression_loss": 105.51058959960938, "distillation_loss": 4.465503215789795, "epoch": 4.95, "learning_rate": 2.8078331924485772e-05, "loss": 109.9128, "step": 5851, "task_loss": 3.1386260986328125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999938719655429, "compression/movement_sparsity/importance_threshold": -4.358931066355404e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9250897317462934, "compression/movement_sparsity/model_sparsity": 0.8933100535519182, "compression_loss": 105.51004028320312, "distillation_loss": 4.503993034362793, "epoch": 4.95, "learning_rate": 2.8073635765943462e-05, "loss": 110.0946, "step": 5852, "task_loss": 2.25058650970459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999941504689496, "compression/movement_sparsity/importance_threshold": -4.1608288592343e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9251400875062197, "compression/movement_sparsity/model_sparsity": 0.8933586794365785, "compression_loss": 105.50950622558594, "distillation_loss": 2.9551308155059814, "epoch": 4.95, "learning_rate": 2.8068939607401145e-05, "loss": 108.5746, "step": 5853, "task_loss": 1.0619791746139526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999944204037072, "compression/movement_sparsity/importance_threshold": -3.9688216162299395e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9251767543216999, "compression/movement_sparsity/model_sparsity": 0.8933940866341468, "compression_loss": 105.50897979736328, "distillation_loss": 5.385952949523926, "epoch": 4.95, "learning_rate": 2.8064243448858834e-05, "loss": 109.8884, "step": 5854, "task_loss": 2.8323798179626465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999946819037011, "compression/movement_sparsity/importance_threshold": -3.7828141034521034e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252456521622999, "compression/movement_sparsity/model_sparsity": 0.8934606176219676, "compression_loss": 105.5084457397461, "distillation_loss": 4.907528877258301, "epoch": 4.95, "learning_rate": 2.8059547290316524e-05, "loss": 109.6003, "step": 5855, "task_loss": 2.572265148162842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999949351028162, "compression/movement_sparsity/importance_threshold": -3.6027110873575185e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252733400795504, "compression/movement_sparsity/model_sparsity": 0.8934873543740826, "compression_loss": 105.50787353515625, "distillation_loss": 4.27154016494751, "epoch": 4.95, "learning_rate": 2.8054851131774214e-05, "loss": 108.9318, "step": 5856, "task_loss": 1.9891083240509033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999951801349377, "compression/movement_sparsity/importance_threshold": -3.428417333882494e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.925230949663605, "compression/movement_sparsity/model_sparsity": 0.893446420199333, "compression_loss": 105.50733947753906, "distillation_loss": 2.3627848625183105, "epoch": 4.95, "learning_rate": 2.8050154973231897e-05, "loss": 109.3373, "step": 5857, "task_loss": 1.487630009651184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999954171339508, "compression/movement_sparsity/importance_threshold": -3.2598376093970205e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9251999826002546, "compression/movement_sparsity/model_sparsity": 0.8934165169498746, "compression_loss": 105.50682067871094, "distillation_loss": 3.189007520675659, "epoch": 4.95, "learning_rate": 2.8045458814689583e-05, "loss": 108.5235, "step": 5858, "task_loss": 1.7824424505233765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999956462337406, "compression/movement_sparsity/importance_threshold": -3.096876679837407e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.92518795111511, "compression/movement_sparsity/model_sparsity": 0.8934048987832579, "compression_loss": 105.50626373291016, "distillation_loss": 3.9546170234680176, "epoch": 4.95, "learning_rate": 2.8040762656147273e-05, "loss": 109.6541, "step": 5859, "task_loss": 1.8208320140838623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999958675681923, "compression/movement_sparsity/importance_threshold": -2.93943931166038e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252015446662148, "compression/movement_sparsity/model_sparsity": 0.8934180253540637, "compression_loss": 105.50575256347656, "distillation_loss": 5.182888507843018, "epoch": 4.95, "learning_rate": 2.8036066497604963e-05, "loss": 109.6818, "step": 5860, "task_loss": 2.9459352493286133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999960812711909, "compression/movement_sparsity/importance_threshold": -2.7874302708889853e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252043587697769, "compression/movement_sparsity/model_sparsity": 0.8934207427845111, "compression_loss": 105.50518798828125, "distillation_loss": 5.70303201675415, "epoch": 4.95, "learning_rate": 2.8031370339062646e-05, "loss": 110.1098, "step": 5861, "task_loss": 3.4596943855285645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999962874766216, "compression/movement_sparsity/importance_threshold": -2.6407543237197406e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9251990286668437, "compression/movement_sparsity/model_sparsity": 0.893415595787011, "compression_loss": 105.50463104248047, "distillation_loss": 4.090864181518555, "epoch": 4.95, "learning_rate": 2.8026674180520335e-05, "loss": 109.2531, "step": 5862, "task_loss": 2.015904426574707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999964863183696, "compression/movement_sparsity/importance_threshold": -2.4993162364359e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.925226299238227, "compression/movement_sparsity/model_sparsity": 0.893441929530373, "compression_loss": 105.50407409667969, "distillation_loss": 3.1733288764953613, "epoch": 4.96, "learning_rate": 2.8021978021978025e-05, "loss": 109.7183, "step": 5863, "task_loss": 1.965816617012024 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99999667793032, "compression/movement_sparsity/importance_threshold": -2.3630207751472454e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252597703767808, "compression/movement_sparsity/model_sparsity": 0.8934742508323483, "compression_loss": 105.5035400390625, "distillation_loss": 4.225765228271484, "epoch": 4.96, "learning_rate": 2.801728186343571e-05, "loss": 109.5378, "step": 5864, "task_loss": 1.6607449054718018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999996862446358, "compression/movement_sparsity/importance_threshold": -2.2317727059635584e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252987027841119, "compression/movement_sparsity/model_sparsity": 0.8935118457917175, "compression_loss": 105.50297546386719, "distillation_loss": 3.818371295928955, "epoch": 4.96, "learning_rate": 2.8012585704893394e-05, "loss": 109.1704, "step": 5865, "task_loss": 3.4182043075561523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999970400003686, "compression/movement_sparsity/importance_threshold": -2.105476795168093e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252705140518207, "compression/movement_sparsity/model_sparsity": 0.8934846254290992, "compression_loss": 105.50240325927734, "distillation_loss": 5.982305526733398, "epoch": 4.96, "learning_rate": 2.8007889546351084e-05, "loss": 110.927, "step": 5866, "task_loss": 2.3477792739868164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999972107262372, "compression/movement_sparsity/importance_threshold": -1.9840378089573674e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9253087071607584, "compression/movement_sparsity/model_sparsity": 0.8935215064872492, "compression_loss": 105.50189971923828, "distillation_loss": 4.025646209716797, "epoch": 4.96, "learning_rate": 2.8003193387808774e-05, "loss": 109.7972, "step": 5867, "task_loss": 2.717454671859741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999973747578486, "compression/movement_sparsity/importance_threshold": -1.8673605135278992e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252735666387355, "compression/movement_sparsity/model_sparsity": 0.8934875731502626, "compression_loss": 105.50130462646484, "distillation_loss": 4.009243488311768, "epoch": 4.96, "learning_rate": 2.7998497229266464e-05, "loss": 110.0612, "step": 5868, "task_loss": 2.3266849517822266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999975322290882, "compression/movement_sparsity/importance_threshold": -1.7553496749027342e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.925241657566142, "compression/movement_sparsity/model_sparsity": 0.8934567602524764, "compression_loss": 105.50069427490234, "distillation_loss": 3.788180112838745, "epoch": 4.96, "learning_rate": 2.7993801070724153e-05, "loss": 109.3287, "step": 5869, "task_loss": 2.275430917739868 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999976832738409, "compression/movement_sparsity/importance_threshold": -1.6479100595385987e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252035002297072, "compression/movement_sparsity/model_sparsity": 0.893419913737934, "compression_loss": 105.5001220703125, "distillation_loss": 4.489040374755859, "epoch": 4.96, "learning_rate": 2.7989104912181836e-05, "loss": 109.5615, "step": 5870, "task_loss": 3.1939125061035156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999978280259921, "compression/movement_sparsity/importance_threshold": -1.544946433371802e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252281474842105, "compression/movement_sparsity/model_sparsity": 0.8934437142834213, "compression_loss": 105.49954223632812, "distillation_loss": 3.2595229148864746, "epoch": 4.96, "learning_rate": 2.7984408753639523e-05, "loss": 109.1501, "step": 5871, "task_loss": 1.6636496782302856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999979666194269, "compression/movement_sparsity/importance_threshold": -1.4463635627723348e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252221615520573, "compression/movement_sparsity/model_sparsity": 0.8934379339864524, "compression_loss": 105.49895477294922, "distillation_loss": 5.237864971160889, "epoch": 4.96, "learning_rate": 2.7979712595097212e-05, "loss": 109.7402, "step": 5872, "task_loss": 2.037280797958374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999980991880303, "compression/movement_sparsity/importance_threshold": -1.3520662137632422e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.925250791478551, "compression/movement_sparsity/model_sparsity": 0.893465580386895, "compression_loss": 105.49842071533203, "distillation_loss": 3.561501979827881, "epoch": 4.96, "learning_rate": 2.7975016436554902e-05, "loss": 108.8019, "step": 5873, "task_loss": 1.5921545028686523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999982258656874, "compression/movement_sparsity/importance_threshold": -1.2619591526277785e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252260130582036, "compression/movement_sparsity/model_sparsity": 0.893441653181514, "compression_loss": 105.49779510498047, "distillation_loss": 4.146827697753906, "epoch": 4.96, "learning_rate": 2.7970320278012585e-05, "loss": 109.4587, "step": 5874, "task_loss": 2.699024200439453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999983467862836, "compression/movement_sparsity/importance_threshold": -1.1759471455624615e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252624413903312, "compression/movement_sparsity/model_sparsity": 0.8934768300883663, "compression_loss": 105.4971694946289, "distillation_loss": 4.513724327087402, "epoch": 4.97, "learning_rate": 2.7965624119470275e-05, "loss": 109.3529, "step": 5875, "task_loss": 2.993084192276001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999984620837039, "compression/movement_sparsity/importance_threshold": -1.093934958677073e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9252870170998287, "compression/movement_sparsity/model_sparsity": 0.8935005615466389, "compression_loss": 105.49651336669922, "distillation_loss": 3.8611950874328613, "epoch": 4.97, "learning_rate": 2.7960927960927964e-05, "loss": 109.3017, "step": 5876, "task_loss": 1.6148545742034912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999985718918334, "compression/movement_sparsity/importance_threshold": -1.015827358168131e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.925336359305506, "compression/movement_sparsity/model_sparsity": 0.8935482086957567, "compression_loss": 105.4958724975586, "distillation_loss": 2.9329190254211426, "epoch": 4.97, "learning_rate": 2.795623180238565e-05, "loss": 108.8164, "step": 5877, "task_loss": 1.392734169960022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999986763445573, "compression/movement_sparsity/importance_threshold": -9.415291103188894e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9253545794336535, "compression/movement_sparsity/model_sparsity": 0.8935658029064507, "compression_loss": 105.4952392578125, "distillation_loss": 4.188737869262695, "epoch": 4.97, "learning_rate": 2.7951535643843334e-05, "loss": 109.2919, "step": 5878, "task_loss": 1.757583737373352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999987755757607, "compression/movement_sparsity/importance_threshold": -8.709449812391301e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9253397815416174, "compression/movement_sparsity/model_sparsity": 0.8935515133675297, "compression_loss": 105.49458312988281, "distillation_loss": 4.3767547607421875, "epoch": 4.97, "learning_rate": 2.7946839485301023e-05, "loss": 109.6268, "step": 5879, "task_loss": 4.1263108253479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999988697193287, "compression/movement_sparsity/importance_threshold": -8.03979737125371e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9253739323577266, "compression/movement_sparsity/model_sparsity": 0.8935844909980453, "compression_loss": 105.49398803710938, "distillation_loss": 3.0965471267700195, "epoch": 4.97, "learning_rate": 2.7942143326758713e-05, "loss": 109.2638, "step": 5880, "task_loss": 1.0954644680023193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999989589091466, "compression/movement_sparsity/importance_threshold": -7.405381440873937e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9253865004304147, "compression/movement_sparsity/model_sparsity": 0.8935966273187728, "compression_loss": 105.49333190917969, "distillation_loss": 5.65733528137207, "epoch": 4.97, "learning_rate": 2.7937447168216403e-05, "loss": 109.5631, "step": 5881, "task_loss": 3.6226072311401367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999990432790993, "compression/movement_sparsity/importance_threshold": -6.805249684951886e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9253681133639202, "compression/movement_sparsity/model_sparsity": 0.8935788719045775, "compression_loss": 105.4926986694336, "distillation_loss": 3.960904121398926, "epoch": 4.97, "learning_rate": 2.7932751009674086e-05, "loss": 109.3871, "step": 5882, "task_loss": 2.2411141395568848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999991229630721, "compression/movement_sparsity/importance_threshold": -6.2384497637180125e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9254340659351142, "compression/movement_sparsity/model_sparsity": 0.8936425588020571, "compression_loss": 105.49205017089844, "distillation_loss": 4.363441467285156, "epoch": 4.97, "learning_rate": 2.7928054851131776e-05, "loss": 110.0835, "step": 5883, "task_loss": 3.017935276031494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999991980949503, "compression/movement_sparsity/importance_threshold": -5.704029339137495e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9254323727033099, "compression/movement_sparsity/model_sparsity": 0.8936409237379743, "compression_loss": 105.49147033691406, "distillation_loss": 3.1406593322753906, "epoch": 4.97, "learning_rate": 2.7923358692589462e-05, "loss": 108.922, "step": 5884, "task_loss": 2.2866129875183105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999992688086187, "compression/movement_sparsity/importance_threshold": -5.201036074042875e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9254581646779063, "compression/movement_sparsity/model_sparsity": 0.893665829678898, "compression_loss": 105.49079132080078, "distillation_loss": 5.291512489318848, "epoch": 4.97, "learning_rate": 2.791866253404715e-05, "loss": 109.3586, "step": 5885, "task_loss": 2.612041711807251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999993352379626, "compression/movement_sparsity/importance_threshold": -4.72851762953197e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.925455684451038, "compression/movement_sparsity/model_sparsity": 0.8936634346554527, "compression_loss": 105.49015808105469, "distillation_loss": 4.438560485839844, "epoch": 4.97, "learning_rate": 2.791396637550484e-05, "loss": 109.3963, "step": 5886, "task_loss": 2.180847406387329 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999993975168672, "compression/movement_sparsity/importance_threshold": -4.285521667569958e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9254813929564609, "compression/movement_sparsity/model_sparsity": 0.8936882599946258, "compression_loss": 105.48946380615234, "distillation_loss": 3.483628511428833, "epoch": 4.98, "learning_rate": 2.7909270216962524e-05, "loss": 109.065, "step": 5887, "task_loss": 2.538081645965576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999994557792176, "compression/movement_sparsity/importance_threshold": -3.871095850122019e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9254802959330385, "compression/movement_sparsity/model_sparsity": 0.8936872006573326, "compression_loss": 105.48883819580078, "distillation_loss": 3.7834632396698, "epoch": 4.98, "learning_rate": 2.7904574058420214e-05, "loss": 109.4389, "step": 5888, "task_loss": 2.8971168994903564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999995101588989, "compression/movement_sparsity/importance_threshold": -3.484287838285971e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9254187076071992, "compression/movement_sparsity/model_sparsity": 0.8936277280799538, "compression_loss": 105.48817443847656, "distillation_loss": 4.589587688446045, "epoch": 4.98, "learning_rate": 2.78998778998779e-05, "loss": 108.8094, "step": 5889, "task_loss": 2.2439539432525635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999995607897962, "compression/movement_sparsity/importance_threshold": -3.1241452948943538e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9254600963930633, "compression/movement_sparsity/model_sparsity": 0.8936676950336967, "compression_loss": 105.48754119873047, "distillation_loss": 5.118533611297607, "epoch": 4.98, "learning_rate": 2.789518174133559e-05, "loss": 110.2846, "step": 5890, "task_loss": 3.2550981044769287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999996078057948, "compression/movement_sparsity/importance_threshold": -2.789715881912347e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9254480410595834, "compression/movement_sparsity/model_sparsity": 0.8936560538380084, "compression_loss": 105.4869155883789, "distillation_loss": 1.7616722583770752, "epoch": 4.98, "learning_rate": 2.7890485582793273e-05, "loss": 108.8604, "step": 5891, "task_loss": 0.6973909139633179 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999996513407796, "compression/movement_sparsity/importance_threshold": -2.4800472613051294e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.925516628871825, "compression/movement_sparsity/model_sparsity": 0.8937222854478986, "compression_loss": 105.48617553710938, "distillation_loss": 2.765517234802246, "epoch": 4.98, "learning_rate": 2.7885789424250963e-05, "loss": 109.9563, "step": 5892, "task_loss": 1.4230362176895142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999999691528636, "compression/movement_sparsity/importance_threshold": -2.194187094170519e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9255201226529423, "compression/movement_sparsity/model_sparsity": 0.8937256592068864, "compression_loss": 105.48552703857422, "distillation_loss": 4.658138275146484, "epoch": 4.98, "learning_rate": 2.7881093265708653e-05, "loss": 109.5776, "step": 5893, "task_loss": 2.4075183868408203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999999728503249, "compression/movement_sparsity/importance_threshold": -1.9311830416063325e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9255056347892646, "compression/movement_sparsity/model_sparsity": 0.893711669045896, "compression_loss": 105.4848403930664, "distillation_loss": 3.267214298248291, "epoch": 4.98, "learning_rate": 2.7876397107166342e-05, "loss": 108.8775, "step": 5894, "task_loss": 1.3002936840057373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999997623985037, "compression/movement_sparsity/importance_threshold": -1.690082767312473e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9254983610470068, "compression/movement_sparsity/model_sparsity": 0.8937046451790613, "compression_loss": 105.48420715332031, "distillation_loss": 3.4485573768615723, "epoch": 4.98, "learning_rate": 2.7871700948624025e-05, "loss": 109.2613, "step": 5895, "task_loss": 2.002229928970337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999997933482855, "compression/movement_sparsity/importance_threshold": -1.469933931519396e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9254985756820242, "compression/movement_sparsity/model_sparsity": 0.8937048524407056, "compression_loss": 105.4835433959961, "distillation_loss": 5.02451753616333, "epoch": 4.98, "learning_rate": 2.7867004790081715e-05, "loss": 109.3249, "step": 5896, "task_loss": 2.3388521671295166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999998214864791, "compression/movement_sparsity/importance_threshold": -1.2697841979270041e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.925488857485401, "compression/movement_sparsity/model_sparsity": 0.8936954680940331, "compression_loss": 105.48294830322266, "distillation_loss": 5.593680381774902, "epoch": 4.98, "learning_rate": 2.78623086315394e-05, "loss": 109.5874, "step": 5897, "task_loss": 2.639195203781128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999998469469701, "compression/movement_sparsity/importance_threshold": -1.088681226765753e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9255329172848156, "compression/movement_sparsity/model_sparsity": 0.893738014303794, "compression_loss": 105.4822769165039, "distillation_loss": 3.092257022857666, "epoch": 4.99, "learning_rate": 2.785761247299709e-05, "loss": 109.0723, "step": 5898, "task_loss": 1.645871877670288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999998698636433, "compression/movement_sparsity/importance_threshold": -9.256726800008219e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9256146455297917, "compression/movement_sparsity/model_sparsity": 0.8938169349321297, "compression_loss": 105.48160552978516, "distillation_loss": 3.582066535949707, "epoch": 4.99, "learning_rate": 2.785291631445478e-05, "loss": 109.2822, "step": 5899, "task_loss": 3.582041025161743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999998903703841, "compression/movement_sparsity/importance_threshold": -7.798062204647516e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9256575486849455, "compression/movement_sparsity/model_sparsity": 0.8938583642319184, "compression_loss": 105.4809799194336, "distillation_loss": 4.364512920379639, "epoch": 4.99, "learning_rate": 2.7848220155912464e-05, "loss": 109.6911, "step": 5900, "task_loss": 3.4463164806365967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999086010773, "compression/movement_sparsity/importance_threshold": -6.501295092553594e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.92567351514541, "compression/movement_sparsity/model_sparsity": 0.8938737821953473, "compression_loss": 105.48030090332031, "distillation_loss": 3.7093143463134766, "epoch": 4.99, "learning_rate": 2.7843523997370153e-05, "loss": 109.0227, "step": 5901, "task_loss": 2.7136361598968506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999246896084, "compression/movement_sparsity/importance_threshold": -5.356902083378245e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9256980670065722, "compression/movement_sparsity/model_sparsity": 0.8938974906245483, "compression_loss": 105.4797134399414, "distillation_loss": 4.583956718444824, "epoch": 4.99, "learning_rate": 2.783882783882784e-05, "loss": 109.6584, "step": 5902, "task_loss": 2.42334246635437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999387698624, "compression/movement_sparsity/importance_threshold": -4.3553597967732616e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9256948355571429, "compression/movement_sparsity/model_sparsity": 0.893894370185348, "compression_loss": 105.47900390625, "distillation_loss": 6.233607292175293, "epoch": 4.99, "learning_rate": 2.783413168028553e-05, "loss": 109.4069, "step": 5903, "task_loss": 3.063390016555786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999509757244, "compression/movement_sparsity/importance_threshold": -3.4871448523904336e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9255995137610619, "compression/movement_sparsity/model_sparsity": 0.8938023229862064, "compression_loss": 105.47834777832031, "distillation_loss": 6.068483352661133, "epoch": 4.99, "learning_rate": 2.7829435521743212e-05, "loss": 109.2665, "step": 5904, "task_loss": 3.62016224861145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999614410795, "compression/movement_sparsity/importance_threshold": -2.742733869881553e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9256607920585425, "compression/movement_sparsity/model_sparsity": 0.8938614961856546, "compression_loss": 105.47769165039062, "distillation_loss": 4.660430908203125, "epoch": 4.99, "learning_rate": 2.7824739363200902e-05, "loss": 109.0155, "step": 5905, "task_loss": 2.2627317905426025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999999970299813, "compression/movement_sparsity/importance_threshold": -2.112603451551176e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9257105993067575, "compression/movement_sparsity/model_sparsity": 0.8939095924016683, "compression_loss": 105.47698974609375, "distillation_loss": 4.269096374511719, "epoch": 4.99, "learning_rate": 2.7820043204658592e-05, "loss": 109.433, "step": 5906, "task_loss": 1.9285924434661865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99999997768581, "compression/movement_sparsity/importance_threshold": -1.5872302430719465e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9257369994139033, "compression/movement_sparsity/model_sparsity": 0.8939350855839174, "compression_loss": 105.47640991210938, "distillation_loss": 6.886570453643799, "epoch": 4.99, "learning_rate": 2.781534704611628e-05, "loss": 110.3398, "step": 5907, "task_loss": 4.10222864151001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999837329554, "compression/movement_sparsity/importance_threshold": -1.1570908554220383e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.925730524590877, "compression/movement_sparsity/model_sparsity": 0.893928833190981, "compression_loss": 105.4756851196289, "distillation_loss": 5.8182172775268555, "epoch": 4.99, "learning_rate": 2.7810650887573965e-05, "loss": 110.4197, "step": 5908, "task_loss": 3.5819215774536133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999885751346, "compression/movement_sparsity/importance_threshold": -8.126618909060079e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.9257198763091782, "compression/movement_sparsity/model_sparsity": 0.8939185507105165, "compression_loss": 105.47505187988281, "distillation_loss": 4.736020565032959, "epoch": 4.99, "learning_rate": 2.780595472903165e-05, "loss": 109.3312, "step": 5909, "task_loss": 2.7263894081115723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999923462328, "compression/movement_sparsity/importance_threshold": -5.4441996917564683e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.925729642202472, "compression/movement_sparsity/model_sparsity": 0.8939279811153322, "compression_loss": 105.47442626953125, "distillation_loss": 4.003232955932617, "epoch": 5.0, "learning_rate": 2.780125857048934e-05, "loss": 109.0231, "step": 5910, "task_loss": 2.5811238288879395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999951801349, "compression/movement_sparsity/importance_threshold": -3.4284173590359845e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.9257951178069606, "compression/movement_sparsity/model_sparsity": 0.89399120743138, "compression_loss": 105.47370910644531, "distillation_loss": 2.868375539779663, "epoch": 5.0, "learning_rate": 2.779656241194703e-05, "loss": 109.2032, "step": 5911, "task_loss": 2.1665103435516357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999972107262, "compression/movement_sparsity/importance_threshold": -1.98403784720802e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.9257680260980918, "compression/movement_sparsity/model_sparsity": 0.8939650464060549, "compression_loss": 105.47308349609375, "distillation_loss": 4.566390037536621, "epoch": 5.0, "learning_rate": 2.7791866253404713e-05, "loss": 110.1263, "step": 5912, "task_loss": 2.266387939453125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999985718918, "compression/movement_sparsity/importance_threshold": -1.0158273527904882e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.9258170225029075, "compression/movement_sparsity/model_sparsity": 0.8940123596336347, "compression_loss": 105.47239685058594, "distillation_loss": 4.651317596435547, "epoch": 5.0, "learning_rate": 2.7787170094862403e-05, "loss": 109.5477, "step": 5913, "task_loss": 2.791844129562378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999993975168, "compression/movement_sparsity/importance_threshold": -4.2855215903747634e-12, "compression/movement_sparsity/linear_layer_sparsity": 0.9259009924913993, "compression/movement_sparsity/model_sparsity": 0.8940934449946998, "compression_loss": 105.47174835205078, "distillation_loss": 4.266538619995117, "epoch": 5.0, "learning_rate": 2.7782473936320093e-05, "loss": 109.6227, "step": 5914, "task_loss": 2.1154801845550537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999998214865, "compression/movement_sparsity/importance_threshold": -1.2697846246689792e-12, "compression/movement_sparsity/linear_layer_sparsity": 0.9258960320376628, "compression/movement_sparsity/model_sparsity": 0.8940886549478093, "compression_loss": 105.47113800048828, "distillation_loss": 3.0528008937835693, "epoch": 5.0, "learning_rate": 2.777777777777778e-05, "loss": 108.7421, "step": 5915, "task_loss": 2.3330800533294678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 9.919439315795898, "epoch": 5.0, "learning_rate": 2.777308161923547e-05, "loss": 90.7798, "step": 5916, "task_loss": 4.384990692138672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 6.1190643310546875, "epoch": 5.0, "learning_rate": 2.7768385460693152e-05, "loss": 6.9749, "step": 5917, "task_loss": 3.5169315338134766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 5.478128433227539, "epoch": 5.0, "learning_rate": 2.776368930215084e-05, "loss": 5.8455, "step": 5918, "task_loss": 2.7551400661468506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 5.36722469329834, "epoch": 5.0, "learning_rate": 2.775899314360853e-05, "loss": 5.1737, "step": 5919, "task_loss": 1.4964383840560913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 4.314269065856934, "epoch": 5.0, "learning_rate": 2.775429698506622e-05, "loss": 3.8079, "step": 5920, "task_loss": 2.705627202987671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 4.364969730377197, "epoch": 5.01, "learning_rate": 2.7749600826523904e-05, "loss": 4.3701, "step": 5921, "task_loss": 1.5933417081832886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 4.428933620452881, "epoch": 5.01, "learning_rate": 2.774490466798159e-05, "loss": 3.6242, "step": 5922, "task_loss": 2.2608861923217773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.548661708831787, "epoch": 5.01, "learning_rate": 2.774020850943928e-05, "loss": 3.5326, "step": 5923, "task_loss": 1.18341064453125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 3.7240376472473145, "epoch": 5.01, "learning_rate": 2.773551235089697e-05, "loss": 3.2206, "step": 5924, "task_loss": 2.1772615909576416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 3.130445718765259, "epoch": 5.01, "learning_rate": 2.7730816192354653e-05, "loss": 2.9278, "step": 5925, "task_loss": 1.5361560583114624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.1076855659484863, "epoch": 5.01, "learning_rate": 2.7726120033812342e-05, "loss": 2.703, "step": 5926, "task_loss": 1.0244978666305542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.472031593322754, "epoch": 5.01, "learning_rate": 2.7721423875270032e-05, "loss": 2.4382, "step": 5927, "task_loss": 2.0248899459838867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.666939377784729, "epoch": 5.01, "learning_rate": 2.771672771672772e-05, "loss": 1.9259, "step": 5928, "task_loss": 0.980613112449646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 3.0043325424194336, "epoch": 5.01, "learning_rate": 2.7712031558185408e-05, "loss": 2.1695, "step": 5929, "task_loss": 1.8197904825210571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.3680202960968018, "epoch": 5.01, "learning_rate": 2.770733539964309e-05, "loss": 2.386, "step": 5930, "task_loss": 1.5113407373428345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6606194972991943, "epoch": 5.01, "learning_rate": 2.770263924110078e-05, "loss": 2.231, "step": 5931, "task_loss": 1.7734535932540894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.129335403442383, "epoch": 5.01, "learning_rate": 2.769794308255847e-05, "loss": 2.2411, "step": 5932, "task_loss": 0.7943421006202698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.9482619762420654, "epoch": 5.02, "learning_rate": 2.7693246924016157e-05, "loss": 2.0524, "step": 5933, "task_loss": 0.9542220234870911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.4601807594299316, "epoch": 5.02, "learning_rate": 2.7688550765473843e-05, "loss": 2.1397, "step": 5934, "task_loss": 2.358696699142456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.4494423866271973, "epoch": 5.02, "learning_rate": 2.768385460693153e-05, "loss": 2.1025, "step": 5935, "task_loss": 1.0936882495880127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6064245700836182, "epoch": 5.02, "learning_rate": 2.767915844838922e-05, "loss": 1.8395, "step": 5936, "task_loss": 0.6748051643371582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5972816944122314, "epoch": 5.02, "learning_rate": 2.767446228984691e-05, "loss": 1.5811, "step": 5937, "task_loss": 1.0513757467269897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.8620073795318604, "epoch": 5.02, "learning_rate": 2.7669766131304592e-05, "loss": 1.945, "step": 5938, "task_loss": 1.295775055885315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6171507835388184, "epoch": 5.02, "learning_rate": 2.7665069972762282e-05, "loss": 1.927, "step": 5939, "task_loss": 2.2351043224334717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.8055548667907715, "epoch": 5.02, "learning_rate": 2.7660373814219968e-05, "loss": 2.0168, "step": 5940, "task_loss": 0.9178979396820068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.3216819763183594, "epoch": 5.02, "learning_rate": 2.7655677655677658e-05, "loss": 2.5866, "step": 5941, "task_loss": 1.8371474742889404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.209804058074951, "epoch": 5.02, "learning_rate": 2.765098149713534e-05, "loss": 1.7082, "step": 5942, "task_loss": 1.8463141918182373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.1148018836975098, "epoch": 5.02, "learning_rate": 2.764628533859303e-05, "loss": 2.0813, "step": 5943, "task_loss": 1.243958592414856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.9449775218963623, "epoch": 5.02, "learning_rate": 2.764158918005072e-05, "loss": 2.4076, "step": 5944, "task_loss": 1.3000142574310303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.447014808654785, "epoch": 5.03, "learning_rate": 2.763689302150841e-05, "loss": 2.1294, "step": 5945, "task_loss": 1.2328853607177734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0358604192733765, "epoch": 5.03, "learning_rate": 2.7632196862966096e-05, "loss": 1.3859, "step": 5946, "task_loss": 0.36124706268310547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1265978813171387, "epoch": 5.03, "learning_rate": 2.7627500704423783e-05, "loss": 1.6122, "step": 5947, "task_loss": 0.6532024145126343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.569379210472107, "epoch": 5.03, "learning_rate": 2.762280454588147e-05, "loss": 1.2932, "step": 5948, "task_loss": 1.1504589319229126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1818127632141113, "epoch": 5.03, "learning_rate": 2.761810838733916e-05, "loss": 1.7391, "step": 5949, "task_loss": 0.983351469039917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.165534019470215, "epoch": 5.03, "learning_rate": 2.761341222879685e-05, "loss": 1.5316, "step": 5950, "task_loss": 1.437286138534546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7277686595916748, "epoch": 5.03, "learning_rate": 2.760871607025453e-05, "loss": 1.9358, "step": 5951, "task_loss": 1.729114055633545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6413116455078125, "epoch": 5.03, "learning_rate": 2.760401991171222e-05, "loss": 1.853, "step": 5952, "task_loss": 1.5032126903533936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5888975858688354, "epoch": 5.03, "learning_rate": 2.7599323753169907e-05, "loss": 1.8079, "step": 5953, "task_loss": 1.311579942703247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.0708746910095215, "epoch": 5.03, "learning_rate": 2.7594627594627597e-05, "loss": 1.6193, "step": 5954, "task_loss": 1.115846037864685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.914422869682312, "epoch": 5.03, "learning_rate": 2.758993143608528e-05, "loss": 1.6965, "step": 5955, "task_loss": 1.799185037612915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5467967987060547, "epoch": 5.03, "learning_rate": 2.758523527754297e-05, "loss": 1.4383, "step": 5956, "task_loss": 1.2228282690048218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0317689180374146, "epoch": 5.04, "learning_rate": 2.758053911900066e-05, "loss": 1.7077, "step": 5957, "task_loss": 0.6057091355323792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.9418132305145264, "epoch": 5.04, "learning_rate": 2.757584296045835e-05, "loss": 1.7798, "step": 5958, "task_loss": 2.1772773265838623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.3347315788269043, "epoch": 5.04, "learning_rate": 2.7571146801916032e-05, "loss": 1.9133, "step": 5959, "task_loss": 2.3602144718170166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5760685205459595, "epoch": 5.04, "learning_rate": 2.756645064337372e-05, "loss": 1.694, "step": 5960, "task_loss": 1.114931344985962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.307918667793274, "epoch": 5.04, "learning_rate": 2.756175448483141e-05, "loss": 1.833, "step": 5961, "task_loss": 0.29417896270751953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.8929929733276367, "epoch": 5.04, "learning_rate": 2.7557058326289098e-05, "loss": 1.9011, "step": 5962, "task_loss": 1.1024556159973145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0432039499282837, "epoch": 5.04, "learning_rate": 2.7552362167746788e-05, "loss": 1.8086, "step": 5963, "task_loss": 1.3864662647247314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.161791205406189, "epoch": 5.04, "learning_rate": 2.754766600920447e-05, "loss": 1.3358, "step": 5964, "task_loss": 0.743415892124176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.408394455909729, "epoch": 5.04, "learning_rate": 2.754296985066216e-05, "loss": 1.5226, "step": 5965, "task_loss": 1.027267575263977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.709733009338379, "epoch": 5.04, "learning_rate": 2.7538273692119847e-05, "loss": 1.8303, "step": 5966, "task_loss": 1.0922616720199585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.565920352935791, "epoch": 5.04, "learning_rate": 2.7533577533577537e-05, "loss": 1.5923, "step": 5967, "task_loss": 1.0915260314941406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.3394200801849365, "epoch": 5.04, "learning_rate": 2.752888137503522e-05, "loss": 1.9213, "step": 5968, "task_loss": 1.8718047142028809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.280888557434082, "epoch": 5.05, "learning_rate": 2.752418521649291e-05, "loss": 1.6482, "step": 5969, "task_loss": 2.506948709487915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7973328828811646, "epoch": 5.05, "learning_rate": 2.75194890579506e-05, "loss": 1.546, "step": 5970, "task_loss": 1.493219256401062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9458789825439453, "epoch": 5.05, "learning_rate": 2.751479289940829e-05, "loss": 1.1198, "step": 5971, "task_loss": 0.6893848180770874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.640953779220581, "epoch": 5.05, "learning_rate": 2.751009674086597e-05, "loss": 1.584, "step": 5972, "task_loss": 1.612959623336792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9503688812255859, "epoch": 5.05, "learning_rate": 2.7505400582323658e-05, "loss": 1.0321, "step": 5973, "task_loss": 0.3642536699771881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5299516916275024, "epoch": 5.05, "learning_rate": 2.7500704423781348e-05, "loss": 1.8837, "step": 5974, "task_loss": 1.327739953994751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7730966806411743, "epoch": 5.05, "learning_rate": 2.7496008265239037e-05, "loss": 1.5001, "step": 5975, "task_loss": 0.8338232636451721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2398841381072998, "epoch": 5.05, "learning_rate": 2.7491312106696727e-05, "loss": 1.2694, "step": 5976, "task_loss": 1.9302295446395874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.8393527269363403, "epoch": 5.05, "learning_rate": 2.748661594815441e-05, "loss": 1.6806, "step": 5977, "task_loss": 2.176180601119995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6903536319732666, "epoch": 5.05, "learning_rate": 2.74819197896121e-05, "loss": 1.6391, "step": 5978, "task_loss": 1.0991424322128296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7782996892929077, "epoch": 5.05, "learning_rate": 2.7477223631069786e-05, "loss": 1.4931, "step": 5979, "task_loss": 1.0576121807098389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0298763513565063, "epoch": 5.05, "learning_rate": 2.7472527472527476e-05, "loss": 1.0925, "step": 5980, "task_loss": 0.870347797870636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.616264820098877, "epoch": 5.06, "learning_rate": 2.746783131398516e-05, "loss": 1.5499, "step": 5981, "task_loss": 1.4294523000717163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6099634170532227, "epoch": 5.06, "learning_rate": 2.746313515544285e-05, "loss": 1.5963, "step": 5982, "task_loss": 0.6542138457298279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.9700772762298584, "epoch": 5.06, "learning_rate": 2.7458438996900538e-05, "loss": 1.4503, "step": 5983, "task_loss": 0.9415558576583862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.8856480121612549, "epoch": 5.06, "learning_rate": 2.7453742838358225e-05, "loss": 1.8304, "step": 5984, "task_loss": 1.0777084827423096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.764885425567627, "epoch": 5.06, "learning_rate": 2.744904667981591e-05, "loss": 1.4483, "step": 5985, "task_loss": 0.8932296633720398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2568703889846802, "epoch": 5.06, "learning_rate": 2.7444350521273597e-05, "loss": 1.2005, "step": 5986, "task_loss": 1.141342043876648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7585270404815674, "epoch": 5.06, "learning_rate": 2.7439654362731287e-05, "loss": 1.7757, "step": 5987, "task_loss": 1.3056159019470215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.184748649597168, "epoch": 5.06, "learning_rate": 2.7434958204188977e-05, "loss": 1.409, "step": 5988, "task_loss": 1.2990970611572266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3023464679718018, "epoch": 5.06, "learning_rate": 2.743026204564666e-05, "loss": 1.632, "step": 5989, "task_loss": 1.1137580871582031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9616519808769226, "epoch": 5.06, "learning_rate": 2.742556588710435e-05, "loss": 1.3768, "step": 5990, "task_loss": 0.7536516189575195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5510261058807373, "epoch": 5.06, "learning_rate": 2.742086972856204e-05, "loss": 1.6903, "step": 5991, "task_loss": 2.349590539932251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6017621755599976, "epoch": 5.07, "learning_rate": 2.7416173570019726e-05, "loss": 1.059, "step": 5992, "task_loss": 0.2865601181983948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.0374913215637207, "epoch": 5.07, "learning_rate": 2.7411477411477415e-05, "loss": 1.5776, "step": 5993, "task_loss": 1.500483512878418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5447111129760742, "epoch": 5.07, "learning_rate": 2.7406781252935098e-05, "loss": 1.2458, "step": 5994, "task_loss": 1.1184951066970825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.8205779790878296, "epoch": 5.07, "learning_rate": 2.7402085094392788e-05, "loss": 1.574, "step": 5995, "task_loss": 1.7084050178527832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9996081590652466, "epoch": 5.07, "learning_rate": 2.7397388935850478e-05, "loss": 1.5217, "step": 5996, "task_loss": 0.7261297106742859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7450529336929321, "epoch": 5.07, "learning_rate": 2.7392692777308164e-05, "loss": 1.651, "step": 5997, "task_loss": 1.1567139625549316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0442901849746704, "epoch": 5.07, "learning_rate": 2.738799661876585e-05, "loss": 1.1114, "step": 5998, "task_loss": 0.513629138469696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.789902925491333, "epoch": 5.07, "learning_rate": 2.7383300460223537e-05, "loss": 1.4087, "step": 5999, "task_loss": 1.2832669019699097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5780248641967773, "epoch": 5.07, "learning_rate": 2.7378604301681226e-05, "loss": 1.5584, "step": 6000, "task_loss": 0.7329779863357544 }, { "epoch": 5.07, "eval_accuracy": 0.8374257425742574, "eval_loss": 0.9255489110946655, "eval_runtime": 223.8092, "eval_samples_per_second": 112.819, "eval_steps_per_second": 0.885, "step": 6000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.362786054611206, "epoch": 5.07, "learning_rate": 2.7373908143138916e-05, "loss": 1.0648, "step": 6001, "task_loss": 1.1526740789413452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.9065369367599487, "epoch": 5.07, "learning_rate": 2.73692119845966e-05, "loss": 1.8937, "step": 6002, "task_loss": 2.4388620853424072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.74306058883667, "epoch": 5.07, "learning_rate": 2.736451582605429e-05, "loss": 1.3321, "step": 6003, "task_loss": 1.244826078414917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5855720043182373, "epoch": 5.08, "learning_rate": 2.7359819667511975e-05, "loss": 1.3418, "step": 6004, "task_loss": 1.0745313167572021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9021191596984863, "epoch": 5.08, "learning_rate": 2.7355123508969665e-05, "loss": 1.1169, "step": 6005, "task_loss": 0.7877470850944519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.472015142440796, "epoch": 5.08, "learning_rate": 2.7350427350427355e-05, "loss": 1.1974, "step": 6006, "task_loss": 1.3068773746490479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2446209192276, "epoch": 5.08, "learning_rate": 2.7345731191885038e-05, "loss": 1.2497, "step": 6007, "task_loss": 1.1026158332824707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6563444137573242, "epoch": 5.08, "learning_rate": 2.7341035033342727e-05, "loss": 1.3805, "step": 6008, "task_loss": 1.0620962381362915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.1654181480407715, "epoch": 5.08, "learning_rate": 2.7336338874800417e-05, "loss": 1.5632, "step": 6009, "task_loss": 1.3349390029907227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4124610424041748, "epoch": 5.08, "learning_rate": 2.7331642716258103e-05, "loss": 1.3428, "step": 6010, "task_loss": 0.9180528521537781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6611921787261963, "epoch": 5.08, "learning_rate": 2.7326946557715786e-05, "loss": 1.7189, "step": 6011, "task_loss": 1.5420823097229004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3303245306015015, "epoch": 5.08, "learning_rate": 2.7322250399173476e-05, "loss": 1.6003, "step": 6012, "task_loss": 1.1020996570587158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.317387342453003, "epoch": 5.08, "learning_rate": 2.7317554240631166e-05, "loss": 1.6846, "step": 6013, "task_loss": 2.2690353393554688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.8045129776000977, "epoch": 5.08, "learning_rate": 2.7312858082088855e-05, "loss": 1.2332, "step": 6014, "task_loss": 0.9895821213722229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5257909297943115, "epoch": 5.08, "learning_rate": 2.730816192354654e-05, "loss": 1.4609, "step": 6015, "task_loss": 0.8433331251144409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2777395248413086, "epoch": 5.09, "learning_rate": 2.7303465765004228e-05, "loss": 1.2134, "step": 6016, "task_loss": 2.043116807937622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.689359188079834, "epoch": 5.09, "learning_rate": 2.7298769606461915e-05, "loss": 1.7203, "step": 6017, "task_loss": 0.8712148666381836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2466418743133545, "epoch": 5.09, "learning_rate": 2.7294073447919604e-05, "loss": 1.354, "step": 6018, "task_loss": 1.322817325592041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9423801898956299, "epoch": 5.09, "learning_rate": 2.7289377289377287e-05, "loss": 1.2677, "step": 6019, "task_loss": 0.733288586139679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.860029935836792, "epoch": 5.09, "learning_rate": 2.7284681130834977e-05, "loss": 1.2436, "step": 6020, "task_loss": 0.37402164936065674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6677806377410889, "epoch": 5.09, "learning_rate": 2.7279984972292667e-05, "loss": 1.5159, "step": 6021, "task_loss": 1.8308193683624268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.589179277420044, "epoch": 5.09, "learning_rate": 2.7275288813750356e-05, "loss": 1.4409, "step": 6022, "task_loss": 1.1338530778884888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.620370626449585, "epoch": 5.09, "learning_rate": 2.7270592655208043e-05, "loss": 1.5878, "step": 6023, "task_loss": 1.515231728553772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7444946765899658, "epoch": 5.09, "learning_rate": 2.7265896496665726e-05, "loss": 1.1136, "step": 6024, "task_loss": 0.5273271799087524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4457865953445435, "epoch": 5.09, "learning_rate": 2.7261200338123415e-05, "loss": 1.1344, "step": 6025, "task_loss": 1.678890585899353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6449804306030273, "epoch": 5.09, "learning_rate": 2.7256504179581105e-05, "loss": 1.5776, "step": 6026, "task_loss": 0.8834776878356934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4237383604049683, "epoch": 5.09, "learning_rate": 2.7251808021038795e-05, "loss": 1.3531, "step": 6027, "task_loss": 1.1187310218811035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3349690437316895, "epoch": 5.1, "learning_rate": 2.7247111862496478e-05, "loss": 1.4212, "step": 6028, "task_loss": 2.0402426719665527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1862348318099976, "epoch": 5.1, "learning_rate": 2.7242415703954168e-05, "loss": 1.4291, "step": 6029, "task_loss": 0.8543505072593689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3424272537231445, "epoch": 5.1, "learning_rate": 2.7237719545411854e-05, "loss": 1.2753, "step": 6030, "task_loss": 1.5949455499649048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7791222929954529, "epoch": 5.1, "learning_rate": 2.7233023386869544e-05, "loss": 1.0573, "step": 6031, "task_loss": 0.6386023163795471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.1770803928375244, "epoch": 5.1, "learning_rate": 2.7228327228327227e-05, "loss": 1.5336, "step": 6032, "task_loss": 1.6900779008865356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.071692943572998, "epoch": 5.1, "learning_rate": 2.7223631069784916e-05, "loss": 1.4281, "step": 6033, "task_loss": 0.9926520586013794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9353287220001221, "epoch": 5.1, "learning_rate": 2.7218934911242606e-05, "loss": 0.8202, "step": 6034, "task_loss": 0.9254095554351807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7034028768539429, "epoch": 5.1, "learning_rate": 2.7214238752700292e-05, "loss": 1.4157, "step": 6035, "task_loss": 0.22637586295604706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1188673973083496, "epoch": 5.1, "learning_rate": 2.720954259415798e-05, "loss": 1.263, "step": 6036, "task_loss": 0.9515078067779541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.460943341255188, "epoch": 5.1, "learning_rate": 2.7204846435615665e-05, "loss": 1.2074, "step": 6037, "task_loss": 0.7580053210258484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0305057764053345, "epoch": 5.1, "learning_rate": 2.7200150277073355e-05, "loss": 1.185, "step": 6038, "task_loss": 1.5516941547393799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8159925937652588, "epoch": 5.1, "learning_rate": 2.7195454118531044e-05, "loss": 1.1154, "step": 6039, "task_loss": 1.400212287902832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2962853908538818, "epoch": 5.11, "learning_rate": 2.7190757959988734e-05, "loss": 0.9578, "step": 6040, "task_loss": 1.596772313117981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2853773832321167, "epoch": 5.11, "learning_rate": 2.7186061801446417e-05, "loss": 1.2024, "step": 6041, "task_loss": 0.7133489847183228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5636565685272217, "epoch": 5.11, "learning_rate": 2.7181365642904107e-05, "loss": 1.6201, "step": 6042, "task_loss": 1.6973786354064941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.878318190574646, "epoch": 5.11, "learning_rate": 2.7176669484361793e-05, "loss": 1.3674, "step": 6043, "task_loss": 0.7737702131271362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2043254375457764, "epoch": 5.11, "learning_rate": 2.7171973325819483e-05, "loss": 1.5075, "step": 6044, "task_loss": 1.0946475267410278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7701714038848877, "epoch": 5.11, "learning_rate": 2.7167277167277166e-05, "loss": 1.4057, "step": 6045, "task_loss": 0.8207547068595886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3701739311218262, "epoch": 5.11, "learning_rate": 2.7162581008734856e-05, "loss": 1.3041, "step": 6046, "task_loss": 1.885419487953186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0608551502227783, "epoch": 5.11, "learning_rate": 2.7157884850192545e-05, "loss": 1.194, "step": 6047, "task_loss": 0.6708285808563232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0865159034729004, "epoch": 5.11, "learning_rate": 2.715318869165023e-05, "loss": 1.2648, "step": 6048, "task_loss": 0.7881240248680115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1132276058197021, "epoch": 5.11, "learning_rate": 2.7148492533107918e-05, "loss": 1.092, "step": 6049, "task_loss": 1.132396936416626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2389557361602783, "epoch": 5.11, "learning_rate": 2.7143796374565604e-05, "loss": 1.1837, "step": 6050, "task_loss": 1.2366503477096558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.000892162322998, "epoch": 5.11, "learning_rate": 2.7139100216023294e-05, "loss": 1.1622, "step": 6051, "task_loss": 0.7678613662719727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.325026750564575, "epoch": 5.12, "learning_rate": 2.7134404057480984e-05, "loss": 1.2763, "step": 6052, "task_loss": 1.7020881175994873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9088505506515503, "epoch": 5.12, "learning_rate": 2.7129707898938674e-05, "loss": 1.1749, "step": 6053, "task_loss": 1.719702124595642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4270869493484497, "epoch": 5.12, "learning_rate": 2.7125011740396357e-05, "loss": 1.1982, "step": 6054, "task_loss": 1.4834192991256714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.52190101146698, "epoch": 5.12, "learning_rate": 2.7120315581854043e-05, "loss": 1.2132, "step": 6055, "task_loss": 1.7077561616897583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6888902187347412, "epoch": 5.12, "learning_rate": 2.7115619423311733e-05, "loss": 1.3104, "step": 6056, "task_loss": 0.6381322145462036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.8510379791259766, "epoch": 5.12, "learning_rate": 2.7110923264769422e-05, "loss": 1.2609, "step": 6057, "task_loss": 1.5467699766159058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0921202898025513, "epoch": 5.12, "learning_rate": 2.7106227106227105e-05, "loss": 1.155, "step": 6058, "task_loss": 1.4595752954483032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9477813243865967, "epoch": 5.12, "learning_rate": 2.7101530947684795e-05, "loss": 1.0996, "step": 6059, "task_loss": 1.5011719465255737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4506008625030518, "epoch": 5.12, "learning_rate": 2.7096834789142485e-05, "loss": 1.3774, "step": 6060, "task_loss": 0.46192988753318787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4952435493469238, "epoch": 5.12, "learning_rate": 2.709213863060017e-05, "loss": 1.3799, "step": 6061, "task_loss": 1.7630170583724976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3676927089691162, "epoch": 5.12, "learning_rate": 2.7087442472057854e-05, "loss": 0.9984, "step": 6062, "task_loss": 0.8018324971199036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8389025926589966, "epoch": 5.13, "learning_rate": 2.7082746313515544e-05, "loss": 1.0459, "step": 6063, "task_loss": 0.45963773131370544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.957067608833313, "epoch": 5.13, "learning_rate": 2.7078050154973233e-05, "loss": 1.2174, "step": 6064, "task_loss": 0.5281568765640259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5009479522705078, "epoch": 5.13, "learning_rate": 2.7073353996430923e-05, "loss": 1.0033, "step": 6065, "task_loss": 0.8884579539299011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.1277551651000977, "epoch": 5.13, "learning_rate": 2.7068657837888606e-05, "loss": 1.4719, "step": 6066, "task_loss": 1.5420527458190918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.156897783279419, "epoch": 5.13, "learning_rate": 2.7063961679346296e-05, "loss": 1.4256, "step": 6067, "task_loss": 1.4441157579421997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7356464266777039, "epoch": 5.13, "learning_rate": 2.7059265520803982e-05, "loss": 1.3692, "step": 6068, "task_loss": 0.9077891707420349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2481683492660522, "epoch": 5.13, "learning_rate": 2.7054569362261672e-05, "loss": 1.3546, "step": 6069, "task_loss": 0.7295634150505066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0509686470031738, "epoch": 5.13, "learning_rate": 2.704987320371936e-05, "loss": 1.2991, "step": 6070, "task_loss": 1.0821237564086914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3256714344024658, "epoch": 5.13, "learning_rate": 2.7045177045177045e-05, "loss": 1.1903, "step": 6071, "task_loss": 1.8389370441436768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1431584358215332, "epoch": 5.13, "learning_rate": 2.7040480886634734e-05, "loss": 1.2784, "step": 6072, "task_loss": 1.5049108266830444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8729743957519531, "epoch": 5.13, "learning_rate": 2.7035784728092424e-05, "loss": 0.9773, "step": 6073, "task_loss": 0.08920536190271378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1995563507080078, "epoch": 5.13, "learning_rate": 2.703108856955011e-05, "loss": 1.2759, "step": 6074, "task_loss": 0.6119864583015442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.447853446006775, "epoch": 5.14, "learning_rate": 2.7026392411007793e-05, "loss": 1.4549, "step": 6075, "task_loss": 1.1879305839538574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2793512344360352, "epoch": 5.14, "learning_rate": 2.7021696252465483e-05, "loss": 1.2952, "step": 6076, "task_loss": 1.329996943473816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7206697463989258, "epoch": 5.14, "learning_rate": 2.7017000093923173e-05, "loss": 1.2263, "step": 6077, "task_loss": 2.1903085708618164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3322277069091797, "epoch": 5.14, "learning_rate": 2.7012303935380863e-05, "loss": 1.5121, "step": 6078, "task_loss": 1.51104736328125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.273158073425293, "epoch": 5.14, "learning_rate": 2.7007607776838545e-05, "loss": 1.1185, "step": 6079, "task_loss": 1.409136176109314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6894508600234985, "epoch": 5.14, "learning_rate": 2.7002911618296235e-05, "loss": 1.2511, "step": 6080, "task_loss": 0.9176352620124817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7033214569091797, "epoch": 5.14, "learning_rate": 2.699821545975392e-05, "loss": 1.3614, "step": 6081, "task_loss": 0.39124035835266113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4722325801849365, "epoch": 5.14, "learning_rate": 2.699351930121161e-05, "loss": 1.2819, "step": 6082, "task_loss": 0.7369562387466431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.050175666809082, "epoch": 5.14, "learning_rate": 2.69888231426693e-05, "loss": 1.3798, "step": 6083, "task_loss": 1.1925793886184692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8883548378944397, "epoch": 5.14, "learning_rate": 2.6984126984126984e-05, "loss": 1.1472, "step": 6084, "task_loss": 0.6385883688926697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.414665699005127, "epoch": 5.14, "learning_rate": 2.6979430825584674e-05, "loss": 1.4032, "step": 6085, "task_loss": 1.6097973585128784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5626944303512573, "epoch": 5.14, "learning_rate": 2.6974734667042363e-05, "loss": 1.5437, "step": 6086, "task_loss": 0.6868955492973328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7533105611801147, "epoch": 5.15, "learning_rate": 2.697003850850005e-05, "loss": 1.4613, "step": 6087, "task_loss": 0.8082822561264038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3132038116455078, "epoch": 5.15, "learning_rate": 2.6965342349957733e-05, "loss": 1.3822, "step": 6088, "task_loss": 0.6183616518974304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0502779483795166, "epoch": 5.15, "learning_rate": 2.6960646191415422e-05, "loss": 0.9537, "step": 6089, "task_loss": 0.7432623505592346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.794998049736023, "epoch": 5.15, "learning_rate": 2.6955950032873112e-05, "loss": 1.2846, "step": 6090, "task_loss": 1.6254698038101196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.137148380279541, "epoch": 5.15, "learning_rate": 2.6951253874330802e-05, "loss": 1.152, "step": 6091, "task_loss": 1.0698273181915283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4182372093200684, "epoch": 5.15, "learning_rate": 2.6946557715788485e-05, "loss": 1.195, "step": 6092, "task_loss": 0.8053495287895203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3108047246932983, "epoch": 5.15, "learning_rate": 2.6941861557246175e-05, "loss": 1.4148, "step": 6093, "task_loss": 1.6493083238601685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3582600355148315, "epoch": 5.15, "learning_rate": 2.693716539870386e-05, "loss": 1.2086, "step": 6094, "task_loss": 0.9708290696144104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7206084728240967, "epoch": 5.15, "learning_rate": 2.693246924016155e-05, "loss": 1.112, "step": 6095, "task_loss": 1.2427935600280762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.432945966720581, "epoch": 5.15, "learning_rate": 2.6927773081619234e-05, "loss": 1.4117, "step": 6096, "task_loss": 1.5585262775421143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.309180498123169, "epoch": 5.15, "learning_rate": 2.6923076923076923e-05, "loss": 1.1311, "step": 6097, "task_loss": 1.3152461051940918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9195569753646851, "epoch": 5.15, "learning_rate": 2.6918380764534613e-05, "loss": 1.2604, "step": 6098, "task_loss": 1.5550198554992676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7592365741729736, "epoch": 5.16, "learning_rate": 2.69136846059923e-05, "loss": 1.2064, "step": 6099, "task_loss": 1.3942025899887085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9046814441680908, "epoch": 5.16, "learning_rate": 2.690898844744999e-05, "loss": 1.3011, "step": 6100, "task_loss": 0.2835928201675415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4483760595321655, "epoch": 5.16, "learning_rate": 2.6904292288907672e-05, "loss": 1.1225, "step": 6101, "task_loss": 3.2307331562042236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0087075233459473, "epoch": 5.16, "learning_rate": 2.6899596130365362e-05, "loss": 0.964, "step": 6102, "task_loss": 0.2366395890712738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3131179809570312, "epoch": 5.16, "learning_rate": 2.689489997182305e-05, "loss": 1.2097, "step": 6103, "task_loss": 0.46171098947525024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1442182064056396, "epoch": 5.16, "learning_rate": 2.689020381328074e-05, "loss": 0.9472, "step": 6104, "task_loss": 0.6807703971862793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.285220980644226, "epoch": 5.16, "learning_rate": 2.6885507654738424e-05, "loss": 1.1967, "step": 6105, "task_loss": 0.5973101854324341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5201876163482666, "epoch": 5.16, "learning_rate": 2.688081149619611e-05, "loss": 1.1791, "step": 6106, "task_loss": 1.6043528318405151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6993893384933472, "epoch": 5.16, "learning_rate": 2.68761153376538e-05, "loss": 1.3175, "step": 6107, "task_loss": 0.7902186512947083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7791996002197266, "epoch": 5.16, "learning_rate": 2.687141917911149e-05, "loss": 1.1058, "step": 6108, "task_loss": 0.8139909505844116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8172497749328613, "epoch": 5.16, "learning_rate": 2.6866723020569173e-05, "loss": 1.0439, "step": 6109, "task_loss": 0.9940793514251709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2609145641326904, "epoch": 5.16, "learning_rate": 2.6862026862026863e-05, "loss": 1.198, "step": 6110, "task_loss": 0.7571166157722473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5356436967849731, "epoch": 5.17, "learning_rate": 2.6857330703484552e-05, "loss": 1.4644, "step": 6111, "task_loss": 1.0309182405471802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.8320374488830566, "epoch": 5.17, "learning_rate": 2.685263454494224e-05, "loss": 1.2143, "step": 6112, "task_loss": 1.1303656101226807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.012213945388794, "epoch": 5.17, "learning_rate": 2.6847938386399925e-05, "loss": 1.142, "step": 6113, "task_loss": 1.1425424814224243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4068005084991455, "epoch": 5.17, "learning_rate": 2.684324222785761e-05, "loss": 1.2888, "step": 6114, "task_loss": 1.253973126411438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3476769924163818, "epoch": 5.17, "learning_rate": 2.68385460693153e-05, "loss": 1.3247, "step": 6115, "task_loss": 1.1544463634490967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0267382860183716, "epoch": 5.17, "learning_rate": 2.683384991077299e-05, "loss": 1.1744, "step": 6116, "task_loss": 0.6073408126831055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3857049942016602, "epoch": 5.17, "learning_rate": 2.682915375223068e-05, "loss": 1.2236, "step": 6117, "task_loss": 0.47395220398902893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.319191813468933, "epoch": 5.17, "learning_rate": 2.6824457593688364e-05, "loss": 1.2468, "step": 6118, "task_loss": 1.187980055809021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.567018985748291, "epoch": 5.17, "learning_rate": 2.681976143514605e-05, "loss": 1.2403, "step": 6119, "task_loss": 1.0020809173583984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8874304294586182, "epoch": 5.17, "learning_rate": 2.681506527660374e-05, "loss": 1.0965, "step": 6120, "task_loss": 0.3074057698249817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.511888027191162, "epoch": 5.17, "learning_rate": 2.681036911806143e-05, "loss": 1.4348, "step": 6121, "task_loss": 1.5631098747253418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.056542158126831, "epoch": 5.17, "learning_rate": 2.6805672959519112e-05, "loss": 1.2357, "step": 6122, "task_loss": 0.9562922120094299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4898769855499268, "epoch": 5.18, "learning_rate": 2.6800976800976802e-05, "loss": 1.2645, "step": 6123, "task_loss": 1.0950974225997925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.753301739692688, "epoch": 5.18, "learning_rate": 2.6796280642434492e-05, "loss": 1.0705, "step": 6124, "task_loss": 0.65256667137146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7636306285858154, "epoch": 5.18, "learning_rate": 2.6791584483892178e-05, "loss": 1.0731, "step": 6125, "task_loss": 0.409709632396698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2290489673614502, "epoch": 5.18, "learning_rate": 2.678688832534986e-05, "loss": 1.0879, "step": 6126, "task_loss": 0.7039706707000732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.143861174583435, "epoch": 5.18, "learning_rate": 2.678219216680755e-05, "loss": 1.2793, "step": 6127, "task_loss": 0.2725723385810852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.8067092895507812, "epoch": 5.18, "learning_rate": 2.677749600826524e-05, "loss": 1.7508, "step": 6128, "task_loss": 0.920843780040741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7941699624061584, "epoch": 5.18, "learning_rate": 2.677279984972293e-05, "loss": 1.1692, "step": 6129, "task_loss": 0.6736214756965637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1768003702163696, "epoch": 5.18, "learning_rate": 2.6768103691180617e-05, "loss": 1.0645, "step": 6130, "task_loss": 1.2507671117782593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7788869738578796, "epoch": 5.18, "learning_rate": 2.6763407532638303e-05, "loss": 1.1327, "step": 6131, "task_loss": 0.9569858908653259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0733391046524048, "epoch": 5.18, "learning_rate": 2.675871137409599e-05, "loss": 1.0941, "step": 6132, "task_loss": 1.167495846748352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0850999355316162, "epoch": 5.18, "learning_rate": 2.675401521555368e-05, "loss": 1.0459, "step": 6133, "task_loss": 0.6666683554649353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8704250454902649, "epoch": 5.19, "learning_rate": 2.674931905701137e-05, "loss": 1.1289, "step": 6134, "task_loss": 1.6516984701156616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0570610761642456, "epoch": 5.19, "learning_rate": 2.674462289846905e-05, "loss": 1.0438, "step": 6135, "task_loss": 0.5692971348762512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7103610634803772, "epoch": 5.19, "learning_rate": 2.673992673992674e-05, "loss": 0.7573, "step": 6136, "task_loss": 0.7238109707832336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5703034400939941, "epoch": 5.19, "learning_rate": 2.673523058138443e-05, "loss": 1.1756, "step": 6137, "task_loss": 1.9297771453857422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5002548694610596, "epoch": 5.19, "learning_rate": 2.6730534422842117e-05, "loss": 1.05, "step": 6138, "task_loss": 1.0961856842041016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6165812015533447, "epoch": 5.19, "learning_rate": 2.67258382642998e-05, "loss": 1.2428, "step": 6139, "task_loss": 0.37295961380004883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9800431728363037, "epoch": 5.19, "learning_rate": 2.672114210575749e-05, "loss": 1.0707, "step": 6140, "task_loss": 0.40803849697113037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5532822608947754, "epoch": 5.19, "learning_rate": 2.671644594721518e-05, "loss": 1.191, "step": 6141, "task_loss": 1.368117094039917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6441535949707031, "epoch": 5.19, "learning_rate": 2.671174978867287e-05, "loss": 1.0386, "step": 6142, "task_loss": 0.2580545246601105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.018692135810852, "epoch": 5.19, "learning_rate": 2.6707053630130553e-05, "loss": 1.1155, "step": 6143, "task_loss": 0.8842845559120178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3950591087341309, "epoch": 5.19, "learning_rate": 2.6702357471588242e-05, "loss": 1.0675, "step": 6144, "task_loss": 0.6455617547035217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4027459621429443, "epoch": 5.19, "learning_rate": 2.669766131304593e-05, "loss": 1.1597, "step": 6145, "task_loss": 1.290284514427185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2498403787612915, "epoch": 5.2, "learning_rate": 2.669296515450362e-05, "loss": 1.1737, "step": 6146, "task_loss": 1.2441987991333008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.223310112953186, "epoch": 5.2, "learning_rate": 2.6688268995961308e-05, "loss": 1.035, "step": 6147, "task_loss": 0.7693168520927429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8752123713493347, "epoch": 5.2, "learning_rate": 2.668357283741899e-05, "loss": 1.0702, "step": 6148, "task_loss": 1.469398856163025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0768365859985352, "epoch": 5.2, "learning_rate": 2.667887667887668e-05, "loss": 1.0919, "step": 6149, "task_loss": 0.42779234051704407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.434818148612976, "epoch": 5.2, "learning_rate": 2.6674180520334367e-05, "loss": 1.149, "step": 6150, "task_loss": 1.4588658809661865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0490760803222656, "epoch": 5.2, "learning_rate": 2.6669484361792057e-05, "loss": 1.1706, "step": 6151, "task_loss": 1.0135444402694702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2338491678237915, "epoch": 5.2, "learning_rate": 2.666478820324974e-05, "loss": 1.19, "step": 6152, "task_loss": 0.5657461285591125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3398277759552002, "epoch": 5.2, "learning_rate": 2.666009204470743e-05, "loss": 1.0644, "step": 6153, "task_loss": 1.8201038837432861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.218705177307129, "epoch": 5.2, "learning_rate": 2.665539588616512e-05, "loss": 1.1226, "step": 6154, "task_loss": 0.7776895761489868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.111227035522461, "epoch": 5.2, "learning_rate": 2.665069972762281e-05, "loss": 1.2408, "step": 6155, "task_loss": 1.9748846292495728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2367753982543945, "epoch": 5.2, "learning_rate": 2.6646003569080492e-05, "loss": 1.1826, "step": 6156, "task_loss": 1.3912829160690308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2793141603469849, "epoch": 5.2, "learning_rate": 2.6641307410538178e-05, "loss": 1.07, "step": 6157, "task_loss": 1.2290019989013672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.868612289428711, "epoch": 5.21, "learning_rate": 2.6636611251995868e-05, "loss": 1.2504, "step": 6158, "task_loss": 1.838861346244812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8464734554290771, "epoch": 5.21, "learning_rate": 2.6631915093453558e-05, "loss": 1.1652, "step": 6159, "task_loss": 1.3350145816802979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9518826007843018, "epoch": 5.21, "learning_rate": 2.6627218934911247e-05, "loss": 1.0043, "step": 6160, "task_loss": 0.9751276969909668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6352512836456299, "epoch": 5.21, "learning_rate": 2.662252277636893e-05, "loss": 1.6292, "step": 6161, "task_loss": 1.4299252033233643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4392664432525635, "epoch": 5.21, "learning_rate": 2.661782661782662e-05, "loss": 1.1024, "step": 6162, "task_loss": 1.642331600189209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6012628078460693, "epoch": 5.21, "learning_rate": 2.6613130459284306e-05, "loss": 1.1943, "step": 6163, "task_loss": 1.3486820459365845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9602446556091309, "epoch": 5.21, "learning_rate": 2.6608434300741996e-05, "loss": 1.0153, "step": 6164, "task_loss": 0.917073130607605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0401842594146729, "epoch": 5.21, "learning_rate": 2.660373814219968e-05, "loss": 1.0585, "step": 6165, "task_loss": 1.0297956466674805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9738554954528809, "epoch": 5.21, "learning_rate": 2.659904198365737e-05, "loss": 0.8609, "step": 6166, "task_loss": 0.9601091146469116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1269104480743408, "epoch": 5.21, "learning_rate": 2.659434582511506e-05, "loss": 1.3513, "step": 6167, "task_loss": 0.7471522092819214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0056819915771484, "epoch": 5.21, "learning_rate": 2.658964966657275e-05, "loss": 0.9095, "step": 6168, "task_loss": 1.0109232664108276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.027864933013916, "epoch": 5.21, "learning_rate": 2.658495350803043e-05, "loss": 1.0625, "step": 6169, "task_loss": 0.7176570296287537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7271490097045898, "epoch": 5.22, "learning_rate": 2.6580257349488118e-05, "loss": 0.8865, "step": 6170, "task_loss": 0.7121618986129761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2054342031478882, "epoch": 5.22, "learning_rate": 2.6575561190945807e-05, "loss": 1.151, "step": 6171, "task_loss": 1.3140528202056885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7510290145874023, "epoch": 5.22, "learning_rate": 2.6570865032403497e-05, "loss": 0.9813, "step": 6172, "task_loss": 0.7346899509429932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9630309343338013, "epoch": 5.22, "learning_rate": 2.656616887386118e-05, "loss": 1.3277, "step": 6173, "task_loss": 0.6570942997932434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.686715304851532, "epoch": 5.22, "learning_rate": 2.656147271531887e-05, "loss": 1.0309, "step": 6174, "task_loss": 0.8296456933021545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3309473991394043, "epoch": 5.22, "learning_rate": 2.655677655677656e-05, "loss": 1.0314, "step": 6175, "task_loss": 0.8451323509216309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.787672221660614, "epoch": 5.22, "learning_rate": 2.6552080398234246e-05, "loss": 1.2032, "step": 6176, "task_loss": 0.6425647139549255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9583227634429932, "epoch": 5.22, "learning_rate": 2.6547384239691936e-05, "loss": 1.2364, "step": 6177, "task_loss": 1.2245876789093018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.144657015800476, "epoch": 5.22, "learning_rate": 2.654268808114962e-05, "loss": 0.8359, "step": 6178, "task_loss": 1.091559886932373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3657872676849365, "epoch": 5.22, "learning_rate": 2.6537991922607308e-05, "loss": 1.183, "step": 6179, "task_loss": 0.7540902495384216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6755839586257935, "epoch": 5.22, "learning_rate": 2.6533295764064998e-05, "loss": 0.9059, "step": 6180, "task_loss": 0.2529342770576477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8517100214958191, "epoch": 5.22, "learning_rate": 2.6528599605522688e-05, "loss": 1.1283, "step": 6181, "task_loss": 0.6168472766876221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9510842561721802, "epoch": 5.23, "learning_rate": 2.652390344698037e-05, "loss": 0.9613, "step": 6182, "task_loss": 0.8888170123100281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2120258808135986, "epoch": 5.23, "learning_rate": 2.6519207288438057e-05, "loss": 1.1663, "step": 6183, "task_loss": 1.944800615310669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1889103651046753, "epoch": 5.23, "learning_rate": 2.6514511129895747e-05, "loss": 0.9729, "step": 6184, "task_loss": 0.4640834331512451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7561454772949219, "epoch": 5.23, "learning_rate": 2.6509814971353436e-05, "loss": 1.1612, "step": 6185, "task_loss": 1.369301676750183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1748135089874268, "epoch": 5.23, "learning_rate": 2.650511881281112e-05, "loss": 1.0979, "step": 6186, "task_loss": 1.042407751083374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9804651737213135, "epoch": 5.23, "learning_rate": 2.650042265426881e-05, "loss": 1.4657, "step": 6187, "task_loss": 0.7802192568778992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1438546180725098, "epoch": 5.23, "learning_rate": 2.64957264957265e-05, "loss": 1.3309, "step": 6188, "task_loss": 0.8047996759414673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.146437168121338, "epoch": 5.23, "learning_rate": 2.6491030337184185e-05, "loss": 1.2093, "step": 6189, "task_loss": 1.1828802824020386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8057373762130737, "epoch": 5.23, "learning_rate": 2.6486334178641868e-05, "loss": 0.9812, "step": 6190, "task_loss": 0.4868757724761963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9631660580635071, "epoch": 5.23, "learning_rate": 2.6481638020099558e-05, "loss": 0.8816, "step": 6191, "task_loss": 1.311761498451233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.031169056892395, "epoch": 5.23, "learning_rate": 2.6476941861557248e-05, "loss": 1.3238, "step": 6192, "task_loss": 0.5354303121566772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.475649118423462, "epoch": 5.23, "learning_rate": 2.6472245703014937e-05, "loss": 1.2112, "step": 6193, "task_loss": 1.3338350057601929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.974225640296936, "epoch": 5.24, "learning_rate": 2.6467549544472624e-05, "loss": 1.5624, "step": 6194, "task_loss": 0.8312191963195801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.287976861000061, "epoch": 5.24, "learning_rate": 2.646285338593031e-05, "loss": 1.2337, "step": 6195, "task_loss": 0.9710474610328674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4781532287597656, "epoch": 5.24, "learning_rate": 2.6458157227387996e-05, "loss": 1.0449, "step": 6196, "task_loss": 2.035592555999756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3114919662475586, "epoch": 5.24, "learning_rate": 2.6453461068845686e-05, "loss": 1.2518, "step": 6197, "task_loss": 1.028786301612854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3795984983444214, "epoch": 5.24, "learning_rate": 2.6448764910303376e-05, "loss": 1.1316, "step": 6198, "task_loss": 0.6978143453598022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9298442602157593, "epoch": 5.24, "learning_rate": 2.644406875176106e-05, "loss": 1.0811, "step": 6199, "task_loss": 0.5269842743873596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9821118712425232, "epoch": 5.24, "learning_rate": 2.643937259321875e-05, "loss": 1.4327, "step": 6200, "task_loss": 1.1459236145019531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7825688719749451, "epoch": 5.24, "learning_rate": 2.6434676434676435e-05, "loss": 1.3468, "step": 6201, "task_loss": 0.2694069445133209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.241217851638794, "epoch": 5.24, "learning_rate": 2.6429980276134125e-05, "loss": 0.929, "step": 6202, "task_loss": 1.812342882156372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9486041069030762, "epoch": 5.24, "learning_rate": 2.6425284117591807e-05, "loss": 0.9568, "step": 6203, "task_loss": 0.8443683981895447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5917683839797974, "epoch": 5.24, "learning_rate": 2.6420587959049497e-05, "loss": 1.0144, "step": 6204, "task_loss": 0.5462884306907654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6338581442832947, "epoch": 5.24, "learning_rate": 2.6415891800507187e-05, "loss": 1.0995, "step": 6205, "task_loss": 0.30089494585990906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1363401412963867, "epoch": 5.25, "learning_rate": 2.6411195641964877e-05, "loss": 1.015, "step": 6206, "task_loss": 1.0378296375274658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.860331654548645, "epoch": 5.25, "learning_rate": 2.6406499483422563e-05, "loss": 1.156, "step": 6207, "task_loss": 0.9441978335380554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8174499273300171, "epoch": 5.25, "learning_rate": 2.640180332488025e-05, "loss": 0.8746, "step": 6208, "task_loss": 0.9207675457000732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9043317437171936, "epoch": 5.25, "learning_rate": 2.6397107166337936e-05, "loss": 1.2023, "step": 6209, "task_loss": 0.7007384896278381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1051671504974365, "epoch": 5.25, "learning_rate": 2.6392411007795625e-05, "loss": 1.1056, "step": 6210, "task_loss": 1.8094149827957153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.486122488975525, "epoch": 5.25, "learning_rate": 2.6387714849253315e-05, "loss": 1.2895, "step": 6211, "task_loss": 0.9646593928337097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.541872262954712, "epoch": 5.25, "learning_rate": 2.6383018690710998e-05, "loss": 1.4163, "step": 6212, "task_loss": 1.447293996810913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0304516553878784, "epoch": 5.25, "learning_rate": 2.6378322532168688e-05, "loss": 1.1386, "step": 6213, "task_loss": 0.4111465811729431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6031378507614136, "epoch": 5.25, "learning_rate": 2.6373626373626374e-05, "loss": 1.063, "step": 6214, "task_loss": 1.8954471349716187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2984228134155273, "epoch": 5.25, "learning_rate": 2.6368930215084064e-05, "loss": 1.256, "step": 6215, "task_loss": 2.243769407272339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.801537275314331, "epoch": 5.25, "learning_rate": 2.6364234056541747e-05, "loss": 1.2431, "step": 6216, "task_loss": 1.2497084140777588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2448657751083374, "epoch": 5.26, "learning_rate": 2.6359537897999437e-05, "loss": 1.1012, "step": 6217, "task_loss": 0.5986504554748535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.681397557258606, "epoch": 5.26, "learning_rate": 2.6354841739457126e-05, "loss": 1.027, "step": 6218, "task_loss": 0.4993392527103424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.850381076335907, "epoch": 5.26, "learning_rate": 2.6350145580914816e-05, "loss": 1.0346, "step": 6219, "task_loss": 0.41307950019836426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.8033324480056763, "epoch": 5.26, "learning_rate": 2.63454494223725e-05, "loss": 1.4281, "step": 6220, "task_loss": 2.3124778270721436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4844787120819092, "epoch": 5.26, "learning_rate": 2.6340753263830185e-05, "loss": 1.1708, "step": 6221, "task_loss": 0.4356376528739929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6397576332092285, "epoch": 5.26, "learning_rate": 2.6336057105287875e-05, "loss": 1.2906, "step": 6222, "task_loss": 0.8729392886161804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.017539620399475, "epoch": 5.26, "learning_rate": 2.6331360946745565e-05, "loss": 0.9452, "step": 6223, "task_loss": 0.5469521284103394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3425133228302002, "epoch": 5.26, "learning_rate": 2.6326664788203254e-05, "loss": 1.2875, "step": 6224, "task_loss": 0.6742210388183594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7143797874450684, "epoch": 5.26, "learning_rate": 2.6321968629660937e-05, "loss": 1.2677, "step": 6225, "task_loss": 1.6483148336410522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.193984866142273, "epoch": 5.26, "learning_rate": 2.6317272471118627e-05, "loss": 0.9447, "step": 6226, "task_loss": 0.41931700706481934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5056847929954529, "epoch": 5.26, "learning_rate": 2.6312576312576314e-05, "loss": 0.9832, "step": 6227, "task_loss": 0.2793525159358978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3663452863693237, "epoch": 5.26, "learning_rate": 2.6307880154034003e-05, "loss": 1.2272, "step": 6228, "task_loss": 2.338336944580078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0584090948104858, "epoch": 5.27, "learning_rate": 2.6303183995491686e-05, "loss": 1.2959, "step": 6229, "task_loss": 0.785457968711853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4544163942337036, "epoch": 5.27, "learning_rate": 2.6298487836949376e-05, "loss": 1.586, "step": 6230, "task_loss": 0.9487395882606506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3515846729278564, "epoch": 5.27, "learning_rate": 2.6293791678407066e-05, "loss": 1.0843, "step": 6231, "task_loss": 1.3999592065811157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8455422520637512, "epoch": 5.27, "learning_rate": 2.6289095519864755e-05, "loss": 1.0641, "step": 6232, "task_loss": 0.47983208298683167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0652663707733154, "epoch": 5.27, "learning_rate": 2.628439936132244e-05, "loss": 1.1916, "step": 6233, "task_loss": 1.5522903203964233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9428818821907043, "epoch": 5.27, "learning_rate": 2.6279703202780125e-05, "loss": 1.1266, "step": 6234, "task_loss": 1.3327751159667969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7949761748313904, "epoch": 5.27, "learning_rate": 2.6275007044237814e-05, "loss": 0.9778, "step": 6235, "task_loss": 0.8693958520889282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.201695442199707, "epoch": 5.27, "learning_rate": 2.6270310885695504e-05, "loss": 1.2017, "step": 6236, "task_loss": 0.6938903331756592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9175559282302856, "epoch": 5.27, "learning_rate": 2.6265614727153194e-05, "loss": 1.1233, "step": 6237, "task_loss": 0.592276930809021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.899194598197937, "epoch": 5.27, "learning_rate": 2.6260918568610877e-05, "loss": 1.229, "step": 6238, "task_loss": 1.6090184450149536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2717074155807495, "epoch": 5.27, "learning_rate": 2.6256222410068567e-05, "loss": 0.8681, "step": 6239, "task_loss": 0.6086977124214172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4519665241241455, "epoch": 5.27, "learning_rate": 2.6251526251526253e-05, "loss": 1.0917, "step": 6240, "task_loss": 1.793878197669983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.254517912864685, "epoch": 5.28, "learning_rate": 2.6246830092983943e-05, "loss": 0.9061, "step": 6241, "task_loss": 2.774243116378784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7925705909729004, "epoch": 5.28, "learning_rate": 2.6242133934441626e-05, "loss": 0.8723, "step": 6242, "task_loss": 1.40912663936615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3583993911743164, "epoch": 5.28, "learning_rate": 2.6237437775899315e-05, "loss": 1.1732, "step": 6243, "task_loss": 2.383763551712036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8990887403488159, "epoch": 5.28, "learning_rate": 2.6232741617357005e-05, "loss": 1.219, "step": 6244, "task_loss": 1.0246232748031616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2570874691009521, "epoch": 5.28, "learning_rate": 2.622804545881469e-05, "loss": 1.222, "step": 6245, "task_loss": 1.6524068117141724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1548901796340942, "epoch": 5.28, "learning_rate": 2.6223349300272378e-05, "loss": 1.1277, "step": 6246, "task_loss": 1.3527284860610962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9121268391609192, "epoch": 5.28, "learning_rate": 2.6218653141730064e-05, "loss": 0.7709, "step": 6247, "task_loss": 0.8280448317527771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.768131971359253, "epoch": 5.28, "learning_rate": 2.6213956983187754e-05, "loss": 1.0661, "step": 6248, "task_loss": 0.9158995747566223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9742251634597778, "epoch": 5.28, "learning_rate": 2.6209260824645443e-05, "loss": 0.8684, "step": 6249, "task_loss": 1.9112627506256104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1385395526885986, "epoch": 5.28, "learning_rate": 2.6204564666103126e-05, "loss": 0.9587, "step": 6250, "task_loss": 1.0635969638824463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7382253408432007, "epoch": 5.28, "learning_rate": 2.6199868507560816e-05, "loss": 1.0789, "step": 6251, "task_loss": 1.2995070219039917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3435275554656982, "epoch": 5.28, "learning_rate": 2.6195172349018502e-05, "loss": 1.3285, "step": 6252, "task_loss": 1.3958388566970825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8839508295059204, "epoch": 5.29, "learning_rate": 2.6190476190476192e-05, "loss": 0.9871, "step": 6253, "task_loss": 0.3241124749183655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8734854459762573, "epoch": 5.29, "learning_rate": 2.6185780031933882e-05, "loss": 1.0681, "step": 6254, "task_loss": 0.22743824124336243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.331413745880127, "epoch": 5.29, "learning_rate": 2.6181083873391565e-05, "loss": 1.1238, "step": 6255, "task_loss": 0.796747088432312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9412879943847656, "epoch": 5.29, "learning_rate": 2.6176387714849255e-05, "loss": 1.0136, "step": 6256, "task_loss": 1.0938544273376465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9505418539047241, "epoch": 5.29, "learning_rate": 2.6171691556306944e-05, "loss": 1.0787, "step": 6257, "task_loss": 0.5871162414550781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2588036060333252, "epoch": 5.29, "learning_rate": 2.616699539776463e-05, "loss": 1.2351, "step": 6258, "task_loss": 1.7623050212860107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.70367431640625, "epoch": 5.29, "learning_rate": 2.6162299239222317e-05, "loss": 1.1211, "step": 6259, "task_loss": 0.3016774356365204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8273639678955078, "epoch": 5.29, "learning_rate": 2.6157603080680003e-05, "loss": 1.2108, "step": 6260, "task_loss": 1.1877806186676025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5665922164916992, "epoch": 5.29, "learning_rate": 2.6152906922137693e-05, "loss": 1.0476, "step": 6261, "task_loss": 1.5153690576553345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.445451021194458, "epoch": 5.29, "learning_rate": 2.6148210763595383e-05, "loss": 1.106, "step": 6262, "task_loss": 0.9027324318885803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1226626634597778, "epoch": 5.29, "learning_rate": 2.6143514605053066e-05, "loss": 1.0226, "step": 6263, "task_loss": 0.46493613719940186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0297969579696655, "epoch": 5.29, "learning_rate": 2.6138818446510756e-05, "loss": 1.0033, "step": 6264, "task_loss": 1.0591336488723755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9917058348655701, "epoch": 5.3, "learning_rate": 2.6134122287968442e-05, "loss": 1.0528, "step": 6265, "task_loss": 0.7732657194137573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7223975658416748, "epoch": 5.3, "learning_rate": 2.612942612942613e-05, "loss": 1.0636, "step": 6266, "task_loss": 0.7968534231185913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0842972993850708, "epoch": 5.3, "learning_rate": 2.6124729970883815e-05, "loss": 1.2212, "step": 6267, "task_loss": 0.6844432353973389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.123986005783081, "epoch": 5.3, "learning_rate": 2.6120033812341504e-05, "loss": 1.1249, "step": 6268, "task_loss": 0.8305672407150269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3449479341506958, "epoch": 5.3, "learning_rate": 2.6115337653799194e-05, "loss": 1.152, "step": 6269, "task_loss": 1.194555640220642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3965964317321777, "epoch": 5.3, "learning_rate": 2.6110641495256884e-05, "loss": 1.2677, "step": 6270, "task_loss": 0.4452875554561615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6390430927276611, "epoch": 5.3, "learning_rate": 2.610594533671457e-05, "loss": 1.0371, "step": 6271, "task_loss": 0.5949299931526184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1187058687210083, "epoch": 5.3, "learning_rate": 2.6101249178172253e-05, "loss": 1.2322, "step": 6272, "task_loss": 0.9060068726539612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6408742070198059, "epoch": 5.3, "learning_rate": 2.6096553019629943e-05, "loss": 1.0286, "step": 6273, "task_loss": 0.9455421566963196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8955656290054321, "epoch": 5.3, "learning_rate": 2.6091856861087632e-05, "loss": 0.8037, "step": 6274, "task_loss": 1.1077662706375122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9491336345672607, "epoch": 5.3, "learning_rate": 2.6087160702545322e-05, "loss": 1.1035, "step": 6275, "task_loss": 0.24338403344154358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0652153491973877, "epoch": 5.3, "learning_rate": 2.6082464544003005e-05, "loss": 1.0501, "step": 6276, "task_loss": 0.7609806060791016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.441405177116394, "epoch": 5.31, "learning_rate": 2.6077768385460695e-05, "loss": 1.0139, "step": 6277, "task_loss": 1.4520909786224365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6695295572280884, "epoch": 5.31, "learning_rate": 2.607307222691838e-05, "loss": 0.9807, "step": 6278, "task_loss": 0.3223990797996521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7699382305145264, "epoch": 5.31, "learning_rate": 2.606837606837607e-05, "loss": 1.0375, "step": 6279, "task_loss": 0.5425816178321838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2732877731323242, "epoch": 5.31, "learning_rate": 2.6063679909833754e-05, "loss": 1.0921, "step": 6280, "task_loss": 1.7239094972610474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.9920345544815063, "epoch": 5.31, "learning_rate": 2.6058983751291444e-05, "loss": 1.1065, "step": 6281, "task_loss": 1.9409791231155396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.619513988494873, "epoch": 5.31, "learning_rate": 2.6054287592749133e-05, "loss": 1.0397, "step": 6282, "task_loss": 0.5166852474212646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9427179098129272, "epoch": 5.31, "learning_rate": 2.6049591434206823e-05, "loss": 0.9521, "step": 6283, "task_loss": 0.6978086233139038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7561464309692383, "epoch": 5.31, "learning_rate": 2.604489527566451e-05, "loss": 1.0592, "step": 6284, "task_loss": 1.0647493600845337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6656410694122314, "epoch": 5.31, "learning_rate": 2.6040199117122192e-05, "loss": 1.1173, "step": 6285, "task_loss": 1.7194557189941406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6584614515304565, "epoch": 5.31, "learning_rate": 2.6035502958579882e-05, "loss": 1.1789, "step": 6286, "task_loss": 0.5800005197525024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.195455551147461, "epoch": 5.31, "learning_rate": 2.6030806800037572e-05, "loss": 1.0972, "step": 6287, "task_loss": 0.9496713280677795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3013471364974976, "epoch": 5.32, "learning_rate": 2.602611064149526e-05, "loss": 1.0459, "step": 6288, "task_loss": 1.087057113647461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6977522373199463, "epoch": 5.32, "learning_rate": 2.6021414482952944e-05, "loss": 0.8468, "step": 6289, "task_loss": 0.8506136536598206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3524396419525146, "epoch": 5.32, "learning_rate": 2.6016718324410634e-05, "loss": 1.3955, "step": 6290, "task_loss": 1.2980319261550903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.36118745803833, "epoch": 5.32, "learning_rate": 2.601202216586832e-05, "loss": 1.233, "step": 6291, "task_loss": 1.276010513305664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0174909830093384, "epoch": 5.32, "learning_rate": 2.600732600732601e-05, "loss": 1.1191, "step": 6292, "task_loss": 1.8033478260040283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8597110509872437, "epoch": 5.32, "learning_rate": 2.6002629848783693e-05, "loss": 0.922, "step": 6293, "task_loss": 0.8075927495956421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.8669419288635254, "epoch": 5.32, "learning_rate": 2.5997933690241383e-05, "loss": 1.3675, "step": 6294, "task_loss": 0.7990806102752686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.87864089012146, "epoch": 5.32, "learning_rate": 2.5993237531699073e-05, "loss": 0.9739, "step": 6295, "task_loss": 0.43987804651260376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8134065866470337, "epoch": 5.32, "learning_rate": 2.598854137315676e-05, "loss": 1.0508, "step": 6296, "task_loss": 1.5736198425292969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4726873636245728, "epoch": 5.32, "learning_rate": 2.5983845214614445e-05, "loss": 1.1885, "step": 6297, "task_loss": 1.0445903539657593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0050188302993774, "epoch": 5.32, "learning_rate": 2.5979149056072132e-05, "loss": 1.1799, "step": 6298, "task_loss": 0.37301650643348694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.4106686115264893, "epoch": 5.32, "learning_rate": 2.597445289752982e-05, "loss": 1.4363, "step": 6299, "task_loss": 2.0497448444366455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1630662679672241, "epoch": 5.33, "learning_rate": 2.596975673898751e-05, "loss": 1.0558, "step": 6300, "task_loss": 1.0007015466690063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9100073575973511, "epoch": 5.33, "learning_rate": 2.59650605804452e-05, "loss": 1.0316, "step": 6301, "task_loss": 2.01212215423584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0581719875335693, "epoch": 5.33, "learning_rate": 2.5960364421902884e-05, "loss": 1.1059, "step": 6302, "task_loss": 1.0291122198104858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8510638475418091, "epoch": 5.33, "learning_rate": 2.5955668263360574e-05, "loss": 0.9826, "step": 6303, "task_loss": 1.2507096529006958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8330592513084412, "epoch": 5.33, "learning_rate": 2.595097210481826e-05, "loss": 1.0844, "step": 6304, "task_loss": 1.2770845890045166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4832602739334106, "epoch": 5.33, "learning_rate": 2.594627594627595e-05, "loss": 1.2875, "step": 6305, "task_loss": 1.8621783256530762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3077850341796875, "epoch": 5.33, "learning_rate": 2.5941579787733633e-05, "loss": 1.1176, "step": 6306, "task_loss": 0.9065914154052734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.709789514541626, "epoch": 5.33, "learning_rate": 2.5936883629191322e-05, "loss": 1.0266, "step": 6307, "task_loss": 0.5881479978561401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8498868942260742, "epoch": 5.33, "learning_rate": 2.5932187470649012e-05, "loss": 1.1565, "step": 6308, "task_loss": 0.9193160533905029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5340443849563599, "epoch": 5.33, "learning_rate": 2.59274913121067e-05, "loss": 1.0437, "step": 6309, "task_loss": 1.0779930353164673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4951860308647156, "epoch": 5.33, "learning_rate": 2.5922795153564385e-05, "loss": 1.0682, "step": 6310, "task_loss": 0.062482595443725586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6462719440460205, "epoch": 5.33, "learning_rate": 2.591809899502207e-05, "loss": 1.3269, "step": 6311, "task_loss": 0.9390131235122681 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.680031955242157, "epoch": 5.34, "learning_rate": 2.591340283647976e-05, "loss": 0.844, "step": 6312, "task_loss": 0.199723482131958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.7682833671569824, "epoch": 5.34, "learning_rate": 2.590870667793745e-05, "loss": 1.1462, "step": 6313, "task_loss": 1.7733063697814941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2445093393325806, "epoch": 5.34, "learning_rate": 2.590401051939514e-05, "loss": 1.1335, "step": 6314, "task_loss": 0.4453241229057312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0617992877960205, "epoch": 5.34, "learning_rate": 2.5899314360852823e-05, "loss": 1.0148, "step": 6315, "task_loss": 1.309937596321106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0602420568466187, "epoch": 5.34, "learning_rate": 2.589461820231051e-05, "loss": 0.9705, "step": 6316, "task_loss": 0.4154098629951477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.8183212280273438, "epoch": 5.34, "learning_rate": 2.58899220437682e-05, "loss": 1.2483, "step": 6317, "task_loss": 1.4478070735931396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5306532382965088, "epoch": 5.34, "learning_rate": 2.588522588522589e-05, "loss": 1.008, "step": 6318, "task_loss": 1.375838279724121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.448251724243164, "epoch": 5.34, "learning_rate": 2.5880529726683572e-05, "loss": 1.1819, "step": 6319, "task_loss": 1.8589439392089844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9067153930664062, "epoch": 5.34, "learning_rate": 2.587583356814126e-05, "loss": 0.7815, "step": 6320, "task_loss": 1.3621230125427246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.251185417175293, "epoch": 5.34, "learning_rate": 2.587113740959895e-05, "loss": 0.9871, "step": 6321, "task_loss": 0.5612819790840149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0040099620819092, "epoch": 5.34, "learning_rate": 2.5866441251056638e-05, "loss": 0.9343, "step": 6322, "task_loss": 0.7605289220809937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7880829572677612, "epoch": 5.34, "learning_rate": 2.586174509251432e-05, "loss": 0.8132, "step": 6323, "task_loss": 0.41629576683044434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6484884023666382, "epoch": 5.35, "learning_rate": 2.585704893397201e-05, "loss": 0.737, "step": 6324, "task_loss": 0.8958199620246887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.303560495376587, "epoch": 5.35, "learning_rate": 2.58523527754297e-05, "loss": 0.9707, "step": 6325, "task_loss": 1.4539903402328491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9981593489646912, "epoch": 5.35, "learning_rate": 2.584765661688739e-05, "loss": 0.827, "step": 6326, "task_loss": 0.6826077103614807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0499603748321533, "epoch": 5.35, "learning_rate": 2.5842960458345073e-05, "loss": 1.0396, "step": 6327, "task_loss": 0.963263213634491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1476118564605713, "epoch": 5.35, "learning_rate": 2.5838264299802763e-05, "loss": 1.0969, "step": 6328, "task_loss": 0.84221351146698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1403520107269287, "epoch": 5.35, "learning_rate": 2.583356814126045e-05, "loss": 1.1707, "step": 6329, "task_loss": 1.1724507808685303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1252774000167847, "epoch": 5.35, "learning_rate": 2.582887198271814e-05, "loss": 1.0681, "step": 6330, "task_loss": 1.868648886680603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2963383197784424, "epoch": 5.35, "learning_rate": 2.582417582417583e-05, "loss": 1.2728, "step": 6331, "task_loss": 1.4471676349639893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6493160724639893, "epoch": 5.35, "learning_rate": 2.581947966563351e-05, "loss": 0.6949, "step": 6332, "task_loss": 0.8132178783416748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9266147017478943, "epoch": 5.35, "learning_rate": 2.58147835070912e-05, "loss": 0.7988, "step": 6333, "task_loss": 0.6618081331253052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6851540803909302, "epoch": 5.35, "learning_rate": 2.581008734854889e-05, "loss": 0.9672, "step": 6334, "task_loss": 0.660747766494751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3590552806854248, "epoch": 5.35, "learning_rate": 2.5805391190006577e-05, "loss": 1.0122, "step": 6335, "task_loss": 1.0245919227600098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9396985769271851, "epoch": 5.36, "learning_rate": 2.580069503146426e-05, "loss": 1.0646, "step": 6336, "task_loss": 0.6915807127952576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.638030767440796, "epoch": 5.36, "learning_rate": 2.579599887292195e-05, "loss": 1.1912, "step": 6337, "task_loss": 0.9771235585212708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7163246870040894, "epoch": 5.36, "learning_rate": 2.579130271437964e-05, "loss": 0.9379, "step": 6338, "task_loss": 0.6869356632232666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.194818377494812, "epoch": 5.36, "learning_rate": 2.578660655583733e-05, "loss": 1.0507, "step": 6339, "task_loss": 1.6643877029418945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.262122392654419, "epoch": 5.36, "learning_rate": 2.5781910397295012e-05, "loss": 1.0544, "step": 6340, "task_loss": 0.6126989722251892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0057233572006226, "epoch": 5.36, "learning_rate": 2.5777214238752702e-05, "loss": 0.8725, "step": 6341, "task_loss": 1.3290799856185913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0772204399108887, "epoch": 5.36, "learning_rate": 2.5772518080210388e-05, "loss": 1.0373, "step": 6342, "task_loss": 1.0613386631011963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6003808975219727, "epoch": 5.36, "learning_rate": 2.5767821921668078e-05, "loss": 0.8616, "step": 6343, "task_loss": 0.5381437540054321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7403531670570374, "epoch": 5.36, "learning_rate": 2.576312576312576e-05, "loss": 1.0802, "step": 6344, "task_loss": 1.2688593864440918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7613866329193115, "epoch": 5.36, "learning_rate": 2.575842960458345e-05, "loss": 0.9305, "step": 6345, "task_loss": 0.45049619674682617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8639078140258789, "epoch": 5.36, "learning_rate": 2.575373344604114e-05, "loss": 0.773, "step": 6346, "task_loss": 0.7917002439498901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1100819110870361, "epoch": 5.36, "learning_rate": 2.5749037287498827e-05, "loss": 1.1782, "step": 6347, "task_loss": 1.4262884855270386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3394521474838257, "epoch": 5.37, "learning_rate": 2.5744341128956516e-05, "loss": 1.1458, "step": 6348, "task_loss": 1.3590813875198364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9739139676094055, "epoch": 5.37, "learning_rate": 2.57396449704142e-05, "loss": 0.9854, "step": 6349, "task_loss": 0.9969496130943298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.2864694595336914, "epoch": 5.37, "learning_rate": 2.573494881187189e-05, "loss": 1.4984, "step": 6350, "task_loss": 2.3490424156188965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7360649108886719, "epoch": 5.37, "learning_rate": 2.573025265332958e-05, "loss": 0.8465, "step": 6351, "task_loss": 0.9907857775688171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2246911525726318, "epoch": 5.37, "learning_rate": 2.572555649478727e-05, "loss": 1.1069, "step": 6352, "task_loss": 1.2603459358215332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4171655178070068, "epoch": 5.37, "learning_rate": 2.572086033624495e-05, "loss": 0.9282, "step": 6353, "task_loss": 1.6089080572128296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.823884129524231, "epoch": 5.37, "learning_rate": 2.571616417770264e-05, "loss": 1.093, "step": 6354, "task_loss": 0.8978970050811768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3504915237426758, "epoch": 5.37, "learning_rate": 2.5711468019160328e-05, "loss": 1.3664, "step": 6355, "task_loss": 1.6159437894821167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.007807970046997, "epoch": 5.37, "learning_rate": 2.5706771860618017e-05, "loss": 0.9996, "step": 6356, "task_loss": 0.8492662906646729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1904467344284058, "epoch": 5.37, "learning_rate": 2.57020757020757e-05, "loss": 1.0731, "step": 6357, "task_loss": 0.6660417318344116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.430095911026001, "epoch": 5.37, "learning_rate": 2.569737954353339e-05, "loss": 1.2733, "step": 6358, "task_loss": 1.299790620803833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.304652214050293, "epoch": 5.38, "learning_rate": 2.569268338499108e-05, "loss": 1.1705, "step": 6359, "task_loss": 1.7587486505508423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.124974250793457, "epoch": 5.38, "learning_rate": 2.5687987226448766e-05, "loss": 1.2375, "step": 6360, "task_loss": 2.455505847930908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2307713031768799, "epoch": 5.38, "learning_rate": 2.5683291067906456e-05, "loss": 1.1118, "step": 6361, "task_loss": 1.0427507162094116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0344038009643555, "epoch": 5.38, "learning_rate": 2.567859490936414e-05, "loss": 1.0311, "step": 6362, "task_loss": 1.0433926582336426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7467828989028931, "epoch": 5.38, "learning_rate": 2.567389875082183e-05, "loss": 0.9401, "step": 6363, "task_loss": 0.8314372897148132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.720356822013855, "epoch": 5.38, "learning_rate": 2.5669202592279518e-05, "loss": 1.1569, "step": 6364, "task_loss": 0.45317718386650085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9022893905639648, "epoch": 5.38, "learning_rate": 2.5664506433737208e-05, "loss": 0.8854, "step": 6365, "task_loss": 0.7513851523399353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.01951003074646, "epoch": 5.38, "learning_rate": 2.565981027519489e-05, "loss": 1.012, "step": 6366, "task_loss": 1.014028549194336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1145877838134766, "epoch": 5.38, "learning_rate": 2.5655114116652577e-05, "loss": 1.0325, "step": 6367, "task_loss": 1.5238088369369507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.706108570098877, "epoch": 5.38, "learning_rate": 2.5650417958110267e-05, "loss": 1.0829, "step": 6368, "task_loss": 0.3553515076637268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.243480920791626, "epoch": 5.38, "learning_rate": 2.5645721799567957e-05, "loss": 1.0387, "step": 6369, "task_loss": 0.9423811435699463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6028841137886047, "epoch": 5.38, "learning_rate": 2.564102564102564e-05, "loss": 0.7597, "step": 6370, "task_loss": 0.25798046588897705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3316113948822021, "epoch": 5.39, "learning_rate": 2.563632948248333e-05, "loss": 1.1278, "step": 6371, "task_loss": 1.5819897651672363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8314573764801025, "epoch": 5.39, "learning_rate": 2.563163332394102e-05, "loss": 0.9604, "step": 6372, "task_loss": 0.6811181902885437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2344552278518677, "epoch": 5.39, "learning_rate": 2.5626937165398705e-05, "loss": 0.8828, "step": 6373, "task_loss": 0.5046597719192505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1312553882598877, "epoch": 5.39, "learning_rate": 2.562224100685639e-05, "loss": 1.2425, "step": 6374, "task_loss": 1.068778157234192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7105304598808289, "epoch": 5.39, "learning_rate": 2.5617544848314078e-05, "loss": 1.0198, "step": 6375, "task_loss": 0.6198731064796448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1761066913604736, "epoch": 5.39, "learning_rate": 2.5612848689771768e-05, "loss": 0.9758, "step": 6376, "task_loss": 0.8916518092155457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9055479168891907, "epoch": 5.39, "learning_rate": 2.5608152531229458e-05, "loss": 1.0546, "step": 6377, "task_loss": 0.6731428503990173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7797449827194214, "epoch": 5.39, "learning_rate": 2.5603456372687147e-05, "loss": 0.8961, "step": 6378, "task_loss": 0.5665112733840942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5086114406585693, "epoch": 5.39, "learning_rate": 2.559876021414483e-05, "loss": 1.1121, "step": 6379, "task_loss": 1.4896830320358276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.088930368423462, "epoch": 5.39, "learning_rate": 2.5594064055602517e-05, "loss": 1.1293, "step": 6380, "task_loss": 1.346992015838623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9938405752182007, "epoch": 5.39, "learning_rate": 2.5589367897060206e-05, "loss": 0.8446, "step": 6381, "task_loss": 1.0330746173858643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2944107055664062, "epoch": 5.39, "learning_rate": 2.5584671738517896e-05, "loss": 1.0868, "step": 6382, "task_loss": 0.6262892484664917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0077623128890991, "epoch": 5.4, "learning_rate": 2.557997557997558e-05, "loss": 0.9085, "step": 6383, "task_loss": 0.9175521731376648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0708560943603516, "epoch": 5.4, "learning_rate": 2.557527942143327e-05, "loss": 1.1007, "step": 6384, "task_loss": 1.7844183444976807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8474211692810059, "epoch": 5.4, "learning_rate": 2.557058326289096e-05, "loss": 1.1308, "step": 6385, "task_loss": 1.4928293228149414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0790715217590332, "epoch": 5.4, "learning_rate": 2.5565887104348645e-05, "loss": 0.9909, "step": 6386, "task_loss": 1.2272330522537231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9919785261154175, "epoch": 5.4, "learning_rate": 2.5561190945806328e-05, "loss": 1.2913, "step": 6387, "task_loss": 0.5895282030105591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0435731410980225, "epoch": 5.4, "learning_rate": 2.5556494787264017e-05, "loss": 1.0042, "step": 6388, "task_loss": 0.5504995584487915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34119269251823425, "epoch": 5.4, "learning_rate": 2.5551798628721707e-05, "loss": 0.7726, "step": 6389, "task_loss": 0.034195199608802795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9045164585113525, "epoch": 5.4, "learning_rate": 2.5547102470179397e-05, "loss": 1.1253, "step": 6390, "task_loss": 1.174195647239685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9824955463409424, "epoch": 5.4, "learning_rate": 2.5542406311637083e-05, "loss": 1.2127, "step": 6391, "task_loss": 1.1945186853408813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1436433792114258, "epoch": 5.4, "learning_rate": 2.553771015309477e-05, "loss": 1.0124, "step": 6392, "task_loss": 0.71559077501297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9119744300842285, "epoch": 5.4, "learning_rate": 2.5533013994552456e-05, "loss": 1.0779, "step": 6393, "task_loss": 0.4001772403717041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9597569108009338, "epoch": 5.4, "learning_rate": 2.5528317836010146e-05, "loss": 1.2246, "step": 6394, "task_loss": 0.4977579414844513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0891964435577393, "epoch": 5.41, "learning_rate": 2.5523621677467835e-05, "loss": 0.8243, "step": 6395, "task_loss": 1.277949333190918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.759095311164856, "epoch": 5.41, "learning_rate": 2.551892551892552e-05, "loss": 1.024, "step": 6396, "task_loss": 0.6564459800720215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1391981840133667, "epoch": 5.41, "learning_rate": 2.5514229360383208e-05, "loss": 1.2199, "step": 6397, "task_loss": 1.1850311756134033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4260303974151611, "epoch": 5.41, "learning_rate": 2.5509533201840898e-05, "loss": 1.2143, "step": 6398, "task_loss": 0.5350561141967773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1726813316345215, "epoch": 5.41, "learning_rate": 2.5504837043298584e-05, "loss": 1.344, "step": 6399, "task_loss": 1.8048787117004395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0538711547851562, "epoch": 5.41, "learning_rate": 2.5500140884756267e-05, "loss": 1.1065, "step": 6400, "task_loss": 1.3283641338348389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5445951223373413, "epoch": 5.41, "learning_rate": 2.5495444726213957e-05, "loss": 0.8748, "step": 6401, "task_loss": 1.0804758071899414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.61854887008667, "epoch": 5.41, "learning_rate": 2.5490748567671647e-05, "loss": 1.3068, "step": 6402, "task_loss": 1.3616797924041748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9377830028533936, "epoch": 5.41, "learning_rate": 2.5486052409129336e-05, "loss": 0.8869, "step": 6403, "task_loss": 1.418213963508606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9543104767799377, "epoch": 5.41, "learning_rate": 2.548135625058702e-05, "loss": 1.0794, "step": 6404, "task_loss": 0.9469397664070129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6023831963539124, "epoch": 5.41, "learning_rate": 2.547666009204471e-05, "loss": 0.9493, "step": 6405, "task_loss": 0.7260835766792297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8392258882522583, "epoch": 5.41, "learning_rate": 2.5471963933502395e-05, "loss": 0.8738, "step": 6406, "task_loss": 1.1259260177612305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6706261038780212, "epoch": 5.42, "learning_rate": 2.5467267774960085e-05, "loss": 0.7423, "step": 6407, "task_loss": 0.5052028894424438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6634154915809631, "epoch": 5.42, "learning_rate": 2.5462571616417775e-05, "loss": 0.7331, "step": 6408, "task_loss": 0.7302467226982117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8492001891136169, "epoch": 5.42, "learning_rate": 2.5457875457875458e-05, "loss": 0.9354, "step": 6409, "task_loss": 0.7666885256767273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9800273180007935, "epoch": 5.42, "learning_rate": 2.5453179299333147e-05, "loss": 0.8393, "step": 6410, "task_loss": 1.4598066806793213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9643076062202454, "epoch": 5.42, "learning_rate": 2.5448483140790834e-05, "loss": 0.9188, "step": 6411, "task_loss": 1.4695262908935547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1082658767700195, "epoch": 5.42, "learning_rate": 2.5443786982248524e-05, "loss": 1.2544, "step": 6412, "task_loss": 1.0364733934402466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7045761346817017, "epoch": 5.42, "learning_rate": 2.5439090823706206e-05, "loss": 0.7312, "step": 6413, "task_loss": 1.190801739692688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0994453430175781, "epoch": 5.42, "learning_rate": 2.5434394665163896e-05, "loss": 0.9568, "step": 6414, "task_loss": 1.973737359046936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9533572793006897, "epoch": 5.42, "learning_rate": 2.5429698506621586e-05, "loss": 0.8788, "step": 6415, "task_loss": 0.5982048511505127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3776030540466309, "epoch": 5.42, "learning_rate": 2.5425002348079276e-05, "loss": 1.0218, "step": 6416, "task_loss": 1.6450928449630737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2623515129089355, "epoch": 5.42, "learning_rate": 2.542030618953696e-05, "loss": 1.3232, "step": 6417, "task_loss": 2.2095158100128174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0274525880813599, "epoch": 5.42, "learning_rate": 2.5415610030994645e-05, "loss": 0.8704, "step": 6418, "task_loss": 0.7443438768386841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1029473543167114, "epoch": 5.43, "learning_rate": 2.5410913872452335e-05, "loss": 1.1313, "step": 6419, "task_loss": 1.332040548324585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.088463544845581, "epoch": 5.43, "learning_rate": 2.5406217713910024e-05, "loss": 1.0743, "step": 6420, "task_loss": 0.561444103717804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5411878824234009, "epoch": 5.43, "learning_rate": 2.5401521555367707e-05, "loss": 0.7014, "step": 6421, "task_loss": 0.9577852487564087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9212995767593384, "epoch": 5.43, "learning_rate": 2.5396825396825397e-05, "loss": 0.9633, "step": 6422, "task_loss": 0.6173824071884155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1239933967590332, "epoch": 5.43, "learning_rate": 2.5392129238283087e-05, "loss": 0.9552, "step": 6423, "task_loss": 0.8767157196998596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5177076458930969, "epoch": 5.43, "learning_rate": 2.5387433079740773e-05, "loss": 0.9165, "step": 6424, "task_loss": 0.1797051876783371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7540852427482605, "epoch": 5.43, "learning_rate": 2.5382736921198463e-05, "loss": 0.6927, "step": 6425, "task_loss": 0.28528422117233276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9328884482383728, "epoch": 5.43, "learning_rate": 2.5378040762656146e-05, "loss": 0.8083, "step": 6426, "task_loss": 0.8624805212020874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4167513847351074, "epoch": 5.43, "learning_rate": 2.5373344604113836e-05, "loss": 1.1935, "step": 6427, "task_loss": 1.3988596200942993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8422014117240906, "epoch": 5.43, "learning_rate": 2.5368648445571525e-05, "loss": 0.7496, "step": 6428, "task_loss": 0.9037885665893555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7381453514099121, "epoch": 5.43, "learning_rate": 2.5363952287029215e-05, "loss": 1.0608, "step": 6429, "task_loss": 0.28821900486946106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6982793807983398, "epoch": 5.44, "learning_rate": 2.5359256128486898e-05, "loss": 0.732, "step": 6430, "task_loss": 0.18700656294822693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7847591042518616, "epoch": 5.44, "learning_rate": 2.5354559969944584e-05, "loss": 1.0533, "step": 6431, "task_loss": 0.30917078256607056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6557032465934753, "epoch": 5.44, "learning_rate": 2.5349863811402274e-05, "loss": 0.8674, "step": 6432, "task_loss": 0.2029854953289032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6246699690818787, "epoch": 5.44, "learning_rate": 2.5345167652859964e-05, "loss": 0.902, "step": 6433, "task_loss": 1.164105772972107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6681567430496216, "epoch": 5.44, "learning_rate": 2.5340471494317647e-05, "loss": 0.9871, "step": 6434, "task_loss": 0.8636514544487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2257847785949707, "epoch": 5.44, "learning_rate": 2.5335775335775336e-05, "loss": 1.0067, "step": 6435, "task_loss": 0.9083907604217529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1488745212554932, "epoch": 5.44, "learning_rate": 2.5331079177233026e-05, "loss": 0.7644, "step": 6436, "task_loss": 0.5656970739364624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6996373534202576, "epoch": 5.44, "learning_rate": 2.5326383018690712e-05, "loss": 1.0886, "step": 6437, "task_loss": 1.1709651947021484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9119250774383545, "epoch": 5.44, "learning_rate": 2.5321686860148402e-05, "loss": 0.7679, "step": 6438, "task_loss": 0.5768476724624634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5701866149902344, "epoch": 5.44, "learning_rate": 2.5316990701606085e-05, "loss": 0.7534, "step": 6439, "task_loss": 0.9829580783843994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.113213062286377, "epoch": 5.44, "learning_rate": 2.5312294543063775e-05, "loss": 1.041, "step": 6440, "task_loss": 1.6013315916061401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9960306882858276, "epoch": 5.44, "learning_rate": 2.5307598384521465e-05, "loss": 1.0015, "step": 6441, "task_loss": 1.284070611000061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1212661266326904, "epoch": 5.45, "learning_rate": 2.530290222597915e-05, "loss": 1.0087, "step": 6442, "task_loss": 1.7662971019744873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7252159118652344, "epoch": 5.45, "learning_rate": 2.5298206067436837e-05, "loss": 0.7711, "step": 6443, "task_loss": 0.7913810610771179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.95545494556427, "epoch": 5.45, "learning_rate": 2.5293509908894524e-05, "loss": 0.9838, "step": 6444, "task_loss": 1.8931242227554321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.462641954421997, "epoch": 5.45, "learning_rate": 2.5288813750352213e-05, "loss": 1.3062, "step": 6445, "task_loss": 0.8831288814544678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.533979058265686, "epoch": 5.45, "learning_rate": 2.5284117591809903e-05, "loss": 1.3714, "step": 6446, "task_loss": 1.819564700126648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8309199213981628, "epoch": 5.45, "learning_rate": 2.5279421433267586e-05, "loss": 0.8695, "step": 6447, "task_loss": 0.538055956363678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0983004570007324, "epoch": 5.45, "learning_rate": 2.5274725274725276e-05, "loss": 0.9997, "step": 6448, "task_loss": 1.3669207096099854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.576421856880188, "epoch": 5.45, "learning_rate": 2.5270029116182966e-05, "loss": 1.1152, "step": 6449, "task_loss": 1.571720004081726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.1303563117980957, "epoch": 5.45, "learning_rate": 2.5265332957640652e-05, "loss": 1.268, "step": 6450, "task_loss": 1.037379503250122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1592273712158203, "epoch": 5.45, "learning_rate": 2.5260636799098335e-05, "loss": 0.8456, "step": 6451, "task_loss": 1.34870445728302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6026341319084167, "epoch": 5.45, "learning_rate": 2.5255940640556025e-05, "loss": 0.9743, "step": 6452, "task_loss": 0.4486827850341797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6393669247627258, "epoch": 5.45, "learning_rate": 2.5251244482013714e-05, "loss": 0.82, "step": 6453, "task_loss": 1.2138590812683105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1597423553466797, "epoch": 5.46, "learning_rate": 2.5246548323471404e-05, "loss": 1.2858, "step": 6454, "task_loss": 1.0601387023925781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8896673321723938, "epoch": 5.46, "learning_rate": 2.524185216492909e-05, "loss": 0.7924, "step": 6455, "task_loss": 0.5881742238998413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8204180002212524, "epoch": 5.46, "learning_rate": 2.5237156006386777e-05, "loss": 0.8208, "step": 6456, "task_loss": 0.9026874303817749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2995469570159912, "epoch": 5.46, "learning_rate": 2.5232459847844463e-05, "loss": 1.0202, "step": 6457, "task_loss": 0.6762723922729492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.028841495513916, "epoch": 5.46, "learning_rate": 2.5227763689302153e-05, "loss": 0.9643, "step": 6458, "task_loss": 0.5543161034584045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0200151205062866, "epoch": 5.46, "learning_rate": 2.5223067530759842e-05, "loss": 0.7978, "step": 6459, "task_loss": 0.12738095223903656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3459439277648926, "epoch": 5.46, "learning_rate": 2.5218371372217525e-05, "loss": 1.0965, "step": 6460, "task_loss": 1.6622364521026611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1270428895950317, "epoch": 5.46, "learning_rate": 2.5213675213675215e-05, "loss": 1.0596, "step": 6461, "task_loss": 2.115691900253296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8249296545982361, "epoch": 5.46, "learning_rate": 2.52089790551329e-05, "loss": 0.8567, "step": 6462, "task_loss": 0.7777090668678284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45985516905784607, "epoch": 5.46, "learning_rate": 2.520428289659059e-05, "loss": 0.9026, "step": 6463, "task_loss": 0.35789889097213745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0575484037399292, "epoch": 5.46, "learning_rate": 2.5199586738048274e-05, "loss": 0.8819, "step": 6464, "task_loss": 0.882922351360321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3270570039749146, "epoch": 5.46, "learning_rate": 2.5194890579505964e-05, "loss": 1.1282, "step": 6465, "task_loss": 1.563591480255127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0714080333709717, "epoch": 5.47, "learning_rate": 2.5190194420963654e-05, "loss": 0.9017, "step": 6466, "task_loss": 0.6920506358146667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.130704402923584, "epoch": 5.47, "learning_rate": 2.5185498262421343e-05, "loss": 0.9198, "step": 6467, "task_loss": 1.9370007514953613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9204579591751099, "epoch": 5.47, "learning_rate": 2.518080210387903e-05, "loss": 0.9046, "step": 6468, "task_loss": 0.6068953275680542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.665841817855835, "epoch": 5.47, "learning_rate": 2.5176105945336713e-05, "loss": 1.1668, "step": 6469, "task_loss": 2.3111705780029297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6041450500488281, "epoch": 5.47, "learning_rate": 2.5171409786794402e-05, "loss": 0.9549, "step": 6470, "task_loss": 0.516470730304718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7661523818969727, "epoch": 5.47, "learning_rate": 2.5166713628252092e-05, "loss": 0.8183, "step": 6471, "task_loss": 0.8372418880462646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6279134154319763, "epoch": 5.47, "learning_rate": 2.5162017469709782e-05, "loss": 1.0835, "step": 6472, "task_loss": 0.529086709022522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9014378786087036, "epoch": 5.47, "learning_rate": 2.5157321311167465e-05, "loss": 0.8336, "step": 6473, "task_loss": 1.3970685005187988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.052718997001648, "epoch": 5.47, "learning_rate": 2.5152625152625155e-05, "loss": 1.0268, "step": 6474, "task_loss": 1.7326576709747314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6660871505737305, "epoch": 5.47, "learning_rate": 2.514792899408284e-05, "loss": 0.9041, "step": 6475, "task_loss": 0.8919122219085693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6272763013839722, "epoch": 5.47, "learning_rate": 2.514323283554053e-05, "loss": 0.7594, "step": 6476, "task_loss": 1.5124480724334717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0800124406814575, "epoch": 5.47, "learning_rate": 2.5138536676998214e-05, "loss": 0.9599, "step": 6477, "task_loss": 0.9328638911247253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9405714273452759, "epoch": 5.48, "learning_rate": 2.5133840518455903e-05, "loss": 0.9787, "step": 6478, "task_loss": 1.340885043144226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5677040815353394, "epoch": 5.48, "learning_rate": 2.5129144359913593e-05, "loss": 0.9856, "step": 6479, "task_loss": 0.3889024257659912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5646227598190308, "epoch": 5.48, "learning_rate": 2.5124448201371283e-05, "loss": 0.7293, "step": 6480, "task_loss": 0.6478696465492249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.092689871788025, "epoch": 5.48, "learning_rate": 2.5119752042828966e-05, "loss": 0.8746, "step": 6481, "task_loss": 0.6931218504905701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5176877975463867, "epoch": 5.48, "learning_rate": 2.5115055884286652e-05, "loss": 1.2679, "step": 6482, "task_loss": 1.0509361028671265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4208579063415527, "epoch": 5.48, "learning_rate": 2.5110359725744342e-05, "loss": 1.2491, "step": 6483, "task_loss": 1.0614898204803467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2696436643600464, "epoch": 5.48, "learning_rate": 2.510566356720203e-05, "loss": 0.939, "step": 6484, "task_loss": 1.2795811891555786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7350759506225586, "epoch": 5.48, "learning_rate": 2.510096740865972e-05, "loss": 1.0111, "step": 6485, "task_loss": 1.214144229888916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0396965742111206, "epoch": 5.48, "learning_rate": 2.5096271250117404e-05, "loss": 0.9792, "step": 6486, "task_loss": 1.2021455764770508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8896199464797974, "epoch": 5.48, "learning_rate": 2.5091575091575094e-05, "loss": 1.1288, "step": 6487, "task_loss": 1.0595605373382568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9641153216362, "epoch": 5.48, "learning_rate": 2.508687893303278e-05, "loss": 0.8888, "step": 6488, "task_loss": 0.5049519538879395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4690970182418823, "epoch": 5.48, "learning_rate": 2.508218277449047e-05, "loss": 1.0716, "step": 6489, "task_loss": 0.9959275722503662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3778198957443237, "epoch": 5.49, "learning_rate": 2.5077486615948153e-05, "loss": 0.9537, "step": 6490, "task_loss": 1.5082241296768188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8789318799972534, "epoch": 5.49, "learning_rate": 2.5072790457405843e-05, "loss": 0.9748, "step": 6491, "task_loss": 2.3226466178894043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7886154055595398, "epoch": 5.49, "learning_rate": 2.5068094298863532e-05, "loss": 0.7551, "step": 6492, "task_loss": 0.2820587754249573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.13168203830719, "epoch": 5.49, "learning_rate": 2.506339814032122e-05, "loss": 0.8927, "step": 6493, "task_loss": 1.045479655265808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.799027681350708, "epoch": 5.49, "learning_rate": 2.5058701981778905e-05, "loss": 0.8473, "step": 6494, "task_loss": 0.5270301103591919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.641897439956665, "epoch": 5.49, "learning_rate": 2.505400582323659e-05, "loss": 1.0757, "step": 6495, "task_loss": 1.0075335502624512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5208733081817627, "epoch": 5.49, "learning_rate": 2.504930966469428e-05, "loss": 1.248, "step": 6496, "task_loss": 2.038220167160034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.187896490097046, "epoch": 5.49, "learning_rate": 2.504461350615197e-05, "loss": 0.7181, "step": 6497, "task_loss": 1.190690040588379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.826850414276123, "epoch": 5.49, "learning_rate": 2.5039917347609654e-05, "loss": 0.9085, "step": 6498, "task_loss": 1.1259533166885376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0058324337005615, "epoch": 5.49, "learning_rate": 2.5035221189067343e-05, "loss": 0.7822, "step": 6499, "task_loss": 0.9627976417541504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2501070499420166, "epoch": 5.49, "learning_rate": 2.5030525030525033e-05, "loss": 1.0883, "step": 6500, "task_loss": 0.6611730456352234 }, { "epoch": 5.49, "eval_accuracy": 0.8758019801980198, "eval_loss": 0.5841071605682373, "eval_runtime": 224.2012, "eval_samples_per_second": 112.622, "eval_steps_per_second": 0.883, "step": 6500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6657013297080994, "epoch": 5.5, "learning_rate": 2.502582887198272e-05, "loss": 0.9657, "step": 6501, "task_loss": 0.7866197228431702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.874518632888794, "epoch": 5.5, "learning_rate": 2.502113271344041e-05, "loss": 0.91, "step": 6502, "task_loss": 1.0947920083999634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8173425793647766, "epoch": 5.5, "learning_rate": 2.5016436554898092e-05, "loss": 1.021, "step": 6503, "task_loss": 0.31172287464141846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.227017879486084, "epoch": 5.5, "learning_rate": 2.5011740396355782e-05, "loss": 1.1065, "step": 6504, "task_loss": 1.135462999343872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5396571159362793, "epoch": 5.5, "learning_rate": 2.500704423781347e-05, "loss": 1.0409, "step": 6505, "task_loss": 0.8420279622077942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4111759662628174, "epoch": 5.5, "learning_rate": 2.5002348079271158e-05, "loss": 1.0688, "step": 6506, "task_loss": 1.8412120342254639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5673649311065674, "epoch": 5.5, "learning_rate": 2.4997651920728844e-05, "loss": 0.8378, "step": 6507, "task_loss": 1.2487928867340088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7217739820480347, "epoch": 5.5, "learning_rate": 2.499295576218653e-05, "loss": 0.965, "step": 6508, "task_loss": 0.5211615562438965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3735766410827637, "epoch": 5.5, "learning_rate": 2.498825960364422e-05, "loss": 1.0545, "step": 6509, "task_loss": 1.114157795906067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.750909686088562, "epoch": 5.5, "learning_rate": 2.4983563445101907e-05, "loss": 0.8933, "step": 6510, "task_loss": 0.98591148853302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.493111252784729, "epoch": 5.5, "learning_rate": 2.4978867286559597e-05, "loss": 1.1582, "step": 6511, "task_loss": 1.2597019672393799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0361049175262451, "epoch": 5.5, "learning_rate": 2.4974171128017283e-05, "loss": 1.1132, "step": 6512, "task_loss": 1.2732086181640625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3803339004516602, "epoch": 5.51, "learning_rate": 2.496947496947497e-05, "loss": 1.0522, "step": 6513, "task_loss": 1.4319822788238525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9364749193191528, "epoch": 5.51, "learning_rate": 2.4964778810932656e-05, "loss": 0.868, "step": 6514, "task_loss": 1.191192865371704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49073582887649536, "epoch": 5.51, "learning_rate": 2.4960082652390345e-05, "loss": 0.8603, "step": 6515, "task_loss": 1.461944580078125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5755711197853088, "epoch": 5.51, "learning_rate": 2.4955386493848035e-05, "loss": 0.7514, "step": 6516, "task_loss": 0.5636588931083679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8524682521820068, "epoch": 5.51, "learning_rate": 2.495069033530572e-05, "loss": 0.815, "step": 6517, "task_loss": 0.750036358833313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3503198623657227, "epoch": 5.51, "learning_rate": 2.494599417676341e-05, "loss": 0.9293, "step": 6518, "task_loss": 1.124351143836975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0745304822921753, "epoch": 5.51, "learning_rate": 2.4941298018221097e-05, "loss": 1.0122, "step": 6519, "task_loss": 1.065743327140808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.977286159992218, "epoch": 5.51, "learning_rate": 2.4936601859678784e-05, "loss": 0.9976, "step": 6520, "task_loss": 0.874297559261322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.218144178390503, "epoch": 5.51, "learning_rate": 2.493190570113647e-05, "loss": 1.1476, "step": 6521, "task_loss": 0.6949684619903564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.903723418712616, "epoch": 5.51, "learning_rate": 2.492720954259416e-05, "loss": 0.6133, "step": 6522, "task_loss": 1.1108521223068237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.943649172782898, "epoch": 5.51, "learning_rate": 2.4922513384051846e-05, "loss": 1.0508, "step": 6523, "task_loss": 1.1693435907363892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8692184686660767, "epoch": 5.51, "learning_rate": 2.4917817225509536e-05, "loss": 0.8165, "step": 6524, "task_loss": 0.45241594314575195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0787488222122192, "epoch": 5.52, "learning_rate": 2.4913121066967222e-05, "loss": 0.8685, "step": 6525, "task_loss": 1.226273775100708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7014660239219666, "epoch": 5.52, "learning_rate": 2.490842490842491e-05, "loss": 1.1164, "step": 6526, "task_loss": 0.3839866518974304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8450026512145996, "epoch": 5.52, "learning_rate": 2.4903728749882595e-05, "loss": 0.7845, "step": 6527, "task_loss": 0.4678587019443512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.179279088973999, "epoch": 5.52, "learning_rate": 2.4899032591340285e-05, "loss": 1.2312, "step": 6528, "task_loss": 0.9219085574150085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8980056643486023, "epoch": 5.52, "learning_rate": 2.489433643279797e-05, "loss": 0.9366, "step": 6529, "task_loss": 0.848045289516449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7776978015899658, "epoch": 5.52, "learning_rate": 2.488964027425566e-05, "loss": 0.7591, "step": 6530, "task_loss": 1.5360541343688965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6884169578552246, "epoch": 5.52, "learning_rate": 2.488494411571335e-05, "loss": 0.9089, "step": 6531, "task_loss": 1.2555665969848633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39432644844055176, "epoch": 5.52, "learning_rate": 2.4880247957171037e-05, "loss": 0.6404, "step": 6532, "task_loss": 0.2633448541164398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1806906461715698, "epoch": 5.52, "learning_rate": 2.4875551798628723e-05, "loss": 1.1236, "step": 6533, "task_loss": 1.2173823118209839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7640842199325562, "epoch": 5.52, "learning_rate": 2.487085564008641e-05, "loss": 0.9601, "step": 6534, "task_loss": 0.6606466770172119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0792315006256104, "epoch": 5.52, "learning_rate": 2.48661594815441e-05, "loss": 0.9695, "step": 6535, "task_loss": 1.7287592887878418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5325403213500977, "epoch": 5.52, "learning_rate": 2.4861463323001785e-05, "loss": 1.2279, "step": 6536, "task_loss": 0.8901312351226807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5049084424972534, "epoch": 5.53, "learning_rate": 2.4856767164459475e-05, "loss": 1.2974, "step": 6537, "task_loss": 1.6228147745132446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6937673687934875, "epoch": 5.53, "learning_rate": 2.485207100591716e-05, "loss": 0.8726, "step": 6538, "task_loss": 0.42685315012931824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1488782167434692, "epoch": 5.53, "learning_rate": 2.4847374847374848e-05, "loss": 1.029, "step": 6539, "task_loss": 1.4054731130599976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0237820148468018, "epoch": 5.53, "learning_rate": 2.4842678688832534e-05, "loss": 1.0746, "step": 6540, "task_loss": 1.09727144241333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6842741966247559, "epoch": 5.53, "learning_rate": 2.4837982530290224e-05, "loss": 1.1833, "step": 6541, "task_loss": 1.2801907062530518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1498908996582031, "epoch": 5.53, "learning_rate": 2.483328637174791e-05, "loss": 0.7997, "step": 6542, "task_loss": 0.3982292413711548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5584738254547119, "epoch": 5.53, "learning_rate": 2.48285902132056e-05, "loss": 0.957, "step": 6543, "task_loss": 0.9155765175819397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0458426475524902, "epoch": 5.53, "learning_rate": 2.4823894054663286e-05, "loss": 0.9545, "step": 6544, "task_loss": 1.2197471857070923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8260934948921204, "epoch": 5.53, "learning_rate": 2.4819197896120973e-05, "loss": 0.7812, "step": 6545, "task_loss": 0.462715744972229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6519031524658203, "epoch": 5.53, "learning_rate": 2.4814501737578662e-05, "loss": 0.7365, "step": 6546, "task_loss": 0.9230536818504333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.73429274559021, "epoch": 5.53, "learning_rate": 2.480980557903635e-05, "loss": 1.0973, "step": 6547, "task_loss": 0.10532329976558685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7215414643287659, "epoch": 5.53, "learning_rate": 2.480510942049404e-05, "loss": 0.8524, "step": 6548, "task_loss": 0.1998419314622879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8578425645828247, "epoch": 5.54, "learning_rate": 2.4800413261951725e-05, "loss": 1.0312, "step": 6549, "task_loss": 1.13750422000885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3633031845092773, "epoch": 5.54, "learning_rate": 2.4795717103409415e-05, "loss": 1.0286, "step": 6550, "task_loss": 1.0998125076293945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7123867273330688, "epoch": 5.54, "learning_rate": 2.47910209448671e-05, "loss": 0.7142, "step": 6551, "task_loss": 0.6095547080039978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8114765882492065, "epoch": 5.54, "learning_rate": 2.4786324786324787e-05, "loss": 1.0524, "step": 6552, "task_loss": 1.328378677368164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8147013187408447, "epoch": 5.54, "learning_rate": 2.4781628627782474e-05, "loss": 0.9243, "step": 6553, "task_loss": 0.8771491050720215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8816978335380554, "epoch": 5.54, "learning_rate": 2.4776932469240163e-05, "loss": 1.3, "step": 6554, "task_loss": 0.9699884653091431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1433846950531006, "epoch": 5.54, "learning_rate": 2.477223631069785e-05, "loss": 0.8648, "step": 6555, "task_loss": 0.9570818543434143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.501092255115509, "epoch": 5.54, "learning_rate": 2.476754015215554e-05, "loss": 0.9149, "step": 6556, "task_loss": 0.5359464287757874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6708316206932068, "epoch": 5.54, "learning_rate": 2.4762843993613226e-05, "loss": 0.6772, "step": 6557, "task_loss": 1.1539133787155151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1241490840911865, "epoch": 5.54, "learning_rate": 2.4758147835070912e-05, "loss": 1.0124, "step": 6558, "task_loss": 1.8013962507247925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7882681488990784, "epoch": 5.54, "learning_rate": 2.47534516765286e-05, "loss": 0.8972, "step": 6559, "task_loss": 0.4476902186870575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6528726816177368, "epoch": 5.54, "learning_rate": 2.4748755517986288e-05, "loss": 0.8052, "step": 6560, "task_loss": 0.7073601484298706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9442431330680847, "epoch": 5.55, "learning_rate": 2.4744059359443978e-05, "loss": 0.9344, "step": 6561, "task_loss": 0.7695355415344238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0196324586868286, "epoch": 5.55, "learning_rate": 2.4739363200901664e-05, "loss": 1.0259, "step": 6562, "task_loss": 0.8847076892852783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6041070222854614, "epoch": 5.55, "learning_rate": 2.4734667042359354e-05, "loss": 0.8162, "step": 6563, "task_loss": 0.612967848777771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45972511172294617, "epoch": 5.55, "learning_rate": 2.4729970883817037e-05, "loss": 1.0237, "step": 6564, "task_loss": 0.8429804444313049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8959006667137146, "epoch": 5.55, "learning_rate": 2.4725274725274727e-05, "loss": 0.8301, "step": 6565, "task_loss": 1.81551992893219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8636816740036011, "epoch": 5.55, "learning_rate": 2.4720578566732413e-05, "loss": 0.888, "step": 6566, "task_loss": 1.137856364250183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.410810947418213, "epoch": 5.55, "learning_rate": 2.4715882408190103e-05, "loss": 0.85, "step": 6567, "task_loss": 0.6335996985435486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8096228837966919, "epoch": 5.55, "learning_rate": 2.471118624964779e-05, "loss": 0.773, "step": 6568, "task_loss": 1.636767864227295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0487672090530396, "epoch": 5.55, "learning_rate": 2.470649009110548e-05, "loss": 1.1909, "step": 6569, "task_loss": 0.36562246084213257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1586394309997559, "epoch": 5.55, "learning_rate": 2.4701793932563165e-05, "loss": 0.6937, "step": 6570, "task_loss": 1.8086717128753662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4634110927581787, "epoch": 5.55, "learning_rate": 2.469709777402085e-05, "loss": 0.8869, "step": 6571, "task_loss": 1.4677180051803589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7731709480285645, "epoch": 5.56, "learning_rate": 2.4692401615478538e-05, "loss": 0.9423, "step": 6572, "task_loss": 0.9105175733566284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1926333904266357, "epoch": 5.56, "learning_rate": 2.4687705456936227e-05, "loss": 0.938, "step": 6573, "task_loss": 0.8334721922874451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4080524444580078, "epoch": 5.56, "learning_rate": 2.4683009298393914e-05, "loss": 0.9646, "step": 6574, "task_loss": 1.2608702182769775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.938406229019165, "epoch": 5.56, "learning_rate": 2.4678313139851604e-05, "loss": 0.848, "step": 6575, "task_loss": 0.8093544840812683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7783303260803223, "epoch": 5.56, "learning_rate": 2.467361698130929e-05, "loss": 1.092, "step": 6576, "task_loss": 0.43104448914527893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.422229290008545, "epoch": 5.56, "learning_rate": 2.4668920822766976e-05, "loss": 0.9266, "step": 6577, "task_loss": 1.3634697198867798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.23459792137146, "epoch": 5.56, "learning_rate": 2.4664224664224666e-05, "loss": 1.1652, "step": 6578, "task_loss": 2.0861220359802246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3613329529762268, "epoch": 5.56, "learning_rate": 2.4659528505682352e-05, "loss": 0.5388, "step": 6579, "task_loss": 0.20278002321720123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5986824035644531, "epoch": 5.56, "learning_rate": 2.4654832347140042e-05, "loss": 0.9207, "step": 6580, "task_loss": 1.6123974323272705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9255947470664978, "epoch": 5.56, "learning_rate": 2.465013618859773e-05, "loss": 0.7928, "step": 6581, "task_loss": 0.6345309019088745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1817421913146973, "epoch": 5.56, "learning_rate": 2.4645440030055418e-05, "loss": 0.9636, "step": 6582, "task_loss": 2.1792688369750977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3129291534423828, "epoch": 5.56, "learning_rate": 2.4640743871513104e-05, "loss": 1.0395, "step": 6583, "task_loss": 0.914962887763977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.809389591217041, "epoch": 5.57, "learning_rate": 2.463604771297079e-05, "loss": 0.8366, "step": 6584, "task_loss": 0.775503396987915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9585346579551697, "epoch": 5.57, "learning_rate": 2.4631351554428477e-05, "loss": 0.6971, "step": 6585, "task_loss": 0.9967755079269409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6984102725982666, "epoch": 5.57, "learning_rate": 2.4626655395886167e-05, "loss": 0.6881, "step": 6586, "task_loss": 0.8694093823432922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.761799156665802, "epoch": 5.57, "learning_rate": 2.4621959237343853e-05, "loss": 0.8402, "step": 6587, "task_loss": 0.7583928108215332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6070970892906189, "epoch": 5.57, "learning_rate": 2.4617263078801543e-05, "loss": 0.7386, "step": 6588, "task_loss": 0.29845988750457764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.821727454662323, "epoch": 5.57, "learning_rate": 2.461256692025923e-05, "loss": 0.7333, "step": 6589, "task_loss": 0.7692535519599915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8122782707214355, "epoch": 5.57, "learning_rate": 2.4607870761716916e-05, "loss": 0.8761, "step": 6590, "task_loss": 1.1019032001495361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8729677200317383, "epoch": 5.57, "learning_rate": 2.4603174603174602e-05, "loss": 1.0377, "step": 6591, "task_loss": 0.7925320863723755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.76020348072052, "epoch": 5.57, "learning_rate": 2.459847844463229e-05, "loss": 0.9034, "step": 6592, "task_loss": 0.9114996790885925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4084465503692627, "epoch": 5.57, "learning_rate": 2.459378228608998e-05, "loss": 1.1793, "step": 6593, "task_loss": 0.9172645211219788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.044830322265625, "epoch": 5.57, "learning_rate": 2.4589086127547668e-05, "loss": 1.2393, "step": 6594, "task_loss": 1.2494149208068848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5938973426818848, "epoch": 5.57, "learning_rate": 2.4584389969005357e-05, "loss": 1.2428, "step": 6595, "task_loss": 1.1803454160690308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.01301908493042, "epoch": 5.58, "learning_rate": 2.457969381046304e-05, "loss": 0.9586, "step": 6596, "task_loss": 0.8023908138275146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8570832014083862, "epoch": 5.58, "learning_rate": 2.457499765192073e-05, "loss": 0.8742, "step": 6597, "task_loss": 0.18843993544578552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40542125701904297, "epoch": 5.58, "learning_rate": 2.4570301493378416e-05, "loss": 0.6925, "step": 6598, "task_loss": 0.31724029779434204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3483798503875732, "epoch": 5.58, "learning_rate": 2.4565605334836106e-05, "loss": 1.0849, "step": 6599, "task_loss": 1.1843674182891846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8310719132423401, "epoch": 5.58, "learning_rate": 2.4560909176293793e-05, "loss": 0.9302, "step": 6600, "task_loss": 1.2632842063903809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8144137859344482, "epoch": 5.58, "learning_rate": 2.4556213017751482e-05, "loss": 0.9781, "step": 6601, "task_loss": 1.4774856567382812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7444736361503601, "epoch": 5.58, "learning_rate": 2.455151685920917e-05, "loss": 0.9254, "step": 6602, "task_loss": 0.9892721176147461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0853462219238281, "epoch": 5.58, "learning_rate": 2.4546820700666855e-05, "loss": 0.998, "step": 6603, "task_loss": 1.4046324491500854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3095498085021973, "epoch": 5.58, "learning_rate": 2.454212454212454e-05, "loss": 1.0312, "step": 6604, "task_loss": 0.6657377481460571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8480690717697144, "epoch": 5.58, "learning_rate": 2.453742838358223e-05, "loss": 0.6865, "step": 6605, "task_loss": 0.7751169204711914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3601957857608795, "epoch": 5.58, "learning_rate": 2.4532732225039917e-05, "loss": 0.6279, "step": 6606, "task_loss": 0.31326693296432495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7744115591049194, "epoch": 5.58, "learning_rate": 2.4528036066497607e-05, "loss": 0.8901, "step": 6607, "task_loss": 0.9824672341346741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8586734533309937, "epoch": 5.59, "learning_rate": 2.4523339907955293e-05, "loss": 0.9957, "step": 6608, "task_loss": 0.9979089498519897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1018619537353516, "epoch": 5.59, "learning_rate": 2.451864374941298e-05, "loss": 0.9672, "step": 6609, "task_loss": 0.9818975925445557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9172312617301941, "epoch": 5.59, "learning_rate": 2.451394759087067e-05, "loss": 1.0975, "step": 6610, "task_loss": 1.1212161779403687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32395967841148376, "epoch": 5.59, "learning_rate": 2.4509251432328356e-05, "loss": 0.7301, "step": 6611, "task_loss": 0.7864381074905396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0467140674591064, "epoch": 5.59, "learning_rate": 2.4504555273786046e-05, "loss": 0.8773, "step": 6612, "task_loss": 1.8163686990737915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8643028140068054, "epoch": 5.59, "learning_rate": 2.4499859115243732e-05, "loss": 0.9889, "step": 6613, "task_loss": 0.4258991777896881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6661215424537659, "epoch": 5.59, "learning_rate": 2.449516295670142e-05, "loss": 0.747, "step": 6614, "task_loss": 0.3034084737300873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5935657620429993, "epoch": 5.59, "learning_rate": 2.4490466798159105e-05, "loss": 0.8089, "step": 6615, "task_loss": 0.7400009632110596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.539846420288086, "epoch": 5.59, "learning_rate": 2.4485770639616794e-05, "loss": 1.1408, "step": 6616, "task_loss": 2.063530921936035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.993257999420166, "epoch": 5.59, "learning_rate": 2.448107448107448e-05, "loss": 0.8129, "step": 6617, "task_loss": 1.235206961631775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2308789491653442, "epoch": 5.59, "learning_rate": 2.447637832253217e-05, "loss": 0.9868, "step": 6618, "task_loss": 1.4475361108779907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7888263463973999, "epoch": 5.59, "learning_rate": 2.4471682163989857e-05, "loss": 0.9076, "step": 6619, "task_loss": 1.4877570867538452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49930375814437866, "epoch": 5.6, "learning_rate": 2.4466986005447546e-05, "loss": 0.793, "step": 6620, "task_loss": 0.2576344609260559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5201314687728882, "epoch": 5.6, "learning_rate": 2.4462289846905233e-05, "loss": 0.8385, "step": 6621, "task_loss": 0.6217016577720642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6896842122077942, "epoch": 5.6, "learning_rate": 2.445759368836292e-05, "loss": 1.0549, "step": 6622, "task_loss": 1.2491698265075684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6932094693183899, "epoch": 5.6, "learning_rate": 2.445289752982061e-05, "loss": 0.776, "step": 6623, "task_loss": 0.9085137248039246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6741946339607239, "epoch": 5.6, "learning_rate": 2.4448201371278295e-05, "loss": 0.7129, "step": 6624, "task_loss": 0.7746827602386475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5608643293380737, "epoch": 5.6, "learning_rate": 2.4443505212735985e-05, "loss": 0.8083, "step": 6625, "task_loss": 0.14720670878887177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8052133321762085, "epoch": 5.6, "learning_rate": 2.443880905419367e-05, "loss": 0.9743, "step": 6626, "task_loss": 1.1505099534988403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9772657155990601, "epoch": 5.6, "learning_rate": 2.443411289565136e-05, "loss": 0.8165, "step": 6627, "task_loss": 1.1509162187576294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7420625686645508, "epoch": 5.6, "learning_rate": 2.4429416737109044e-05, "loss": 0.8621, "step": 6628, "task_loss": 0.5157089829444885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.561317503452301, "epoch": 5.6, "learning_rate": 2.4424720578566734e-05, "loss": 0.8392, "step": 6629, "task_loss": 0.46704426407814026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6949382424354553, "epoch": 5.6, "learning_rate": 2.442002442002442e-05, "loss": 0.8483, "step": 6630, "task_loss": 0.5465234518051147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6426810026168823, "epoch": 5.6, "learning_rate": 2.441532826148211e-05, "loss": 0.9195, "step": 6631, "task_loss": 1.2083864212036133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8195871710777283, "epoch": 5.61, "learning_rate": 2.4410632102939796e-05, "loss": 0.7885, "step": 6632, "task_loss": 0.5219837427139282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4519529938697815, "epoch": 5.61, "learning_rate": 2.4405935944397486e-05, "loss": 0.9386, "step": 6633, "task_loss": 0.8194407820701599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7534432411193848, "epoch": 5.61, "learning_rate": 2.4401239785855172e-05, "loss": 1.161, "step": 6634, "task_loss": 0.6730381846427917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5358664989471436, "epoch": 5.61, "learning_rate": 2.439654362731286e-05, "loss": 0.7089, "step": 6635, "task_loss": 0.36167144775390625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 2.316181182861328, "epoch": 5.61, "learning_rate": 2.4391847468770545e-05, "loss": 1.2255, "step": 6636, "task_loss": 1.8150451183319092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5707367658615112, "epoch": 5.61, "learning_rate": 2.4387151310228235e-05, "loss": 0.7732, "step": 6637, "task_loss": 0.467639684677124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.043367862701416, "epoch": 5.61, "learning_rate": 2.4382455151685924e-05, "loss": 1.0716, "step": 6638, "task_loss": 1.2554540634155273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4039924740791321, "epoch": 5.61, "learning_rate": 2.437775899314361e-05, "loss": 0.7863, "step": 6639, "task_loss": 0.03799688071012497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6845894455909729, "epoch": 5.61, "learning_rate": 2.4373062834601297e-05, "loss": 0.8672, "step": 6640, "task_loss": 0.9098131060600281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8093311190605164, "epoch": 5.61, "learning_rate": 2.4368366676058983e-05, "loss": 0.8112, "step": 6641, "task_loss": 0.7662151455879211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3830225467681885, "epoch": 5.61, "learning_rate": 2.4363670517516673e-05, "loss": 0.8076, "step": 6642, "task_loss": 0.11007338762283325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6091998219490051, "epoch": 5.61, "learning_rate": 2.435897435897436e-05, "loss": 1.014, "step": 6643, "task_loss": 0.83219313621521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.687790036201477, "epoch": 5.62, "learning_rate": 2.435427820043205e-05, "loss": 0.8248, "step": 6644, "task_loss": 0.4475155174732208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0012093782424927, "epoch": 5.62, "learning_rate": 2.4349582041889735e-05, "loss": 0.9388, "step": 6645, "task_loss": 0.7714759707450867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6693743467330933, "epoch": 5.62, "learning_rate": 2.4344885883347425e-05, "loss": 0.791, "step": 6646, "task_loss": 0.28137513995170593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5692740678787231, "epoch": 5.62, "learning_rate": 2.4340189724805108e-05, "loss": 0.7955, "step": 6647, "task_loss": 0.7480139136314392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9269230365753174, "epoch": 5.62, "learning_rate": 2.4335493566262798e-05, "loss": 0.9233, "step": 6648, "task_loss": 0.6314989328384399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2065144777297974, "epoch": 5.62, "learning_rate": 2.4330797407720484e-05, "loss": 0.7508, "step": 6649, "task_loss": 0.6370837688446045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1174349784851074, "epoch": 5.62, "learning_rate": 2.4326101249178174e-05, "loss": 1.0569, "step": 6650, "task_loss": 1.1658120155334473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6105841398239136, "epoch": 5.62, "learning_rate": 2.432140509063586e-05, "loss": 0.8393, "step": 6651, "task_loss": 0.9697986245155334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.749163031578064, "epoch": 5.62, "learning_rate": 2.431670893209355e-05, "loss": 0.6414, "step": 6652, "task_loss": 0.9989696145057678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0554425716400146, "epoch": 5.62, "learning_rate": 2.4312012773551236e-05, "loss": 0.977, "step": 6653, "task_loss": 1.1265437602996826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.879107654094696, "epoch": 5.62, "learning_rate": 2.4307316615008923e-05, "loss": 0.6981, "step": 6654, "task_loss": 0.9518454670906067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0895823240280151, "epoch": 5.63, "learning_rate": 2.4302620456466612e-05, "loss": 0.8336, "step": 6655, "task_loss": 1.3270900249481201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.910778820514679, "epoch": 5.63, "learning_rate": 2.42979242979243e-05, "loss": 0.745, "step": 6656, "task_loss": 1.2071506977081299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1267526149749756, "epoch": 5.63, "learning_rate": 2.429322813938199e-05, "loss": 1.0047, "step": 6657, "task_loss": 1.3380628824234009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.586982011795044, "epoch": 5.63, "learning_rate": 2.4288531980839675e-05, "loss": 1.2665, "step": 6658, "task_loss": 0.9552856683731079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8124516606330872, "epoch": 5.63, "learning_rate": 2.428383582229736e-05, "loss": 0.9785, "step": 6659, "task_loss": 0.48979273438453674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3653864860534668, "epoch": 5.63, "learning_rate": 2.4279139663755047e-05, "loss": 0.9708, "step": 6660, "task_loss": 0.9880106449127197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8146286010742188, "epoch": 5.63, "learning_rate": 2.4274443505212737e-05, "loss": 0.8991, "step": 6661, "task_loss": 1.3088200092315674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8613710999488831, "epoch": 5.63, "learning_rate": 2.4269747346670424e-05, "loss": 1.0112, "step": 6662, "task_loss": 1.3298759460449219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7236371040344238, "epoch": 5.63, "learning_rate": 2.4265051188128113e-05, "loss": 0.715, "step": 6663, "task_loss": 0.5980100631713867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6236189603805542, "epoch": 5.63, "learning_rate": 2.42603550295858e-05, "loss": 0.7836, "step": 6664, "task_loss": 1.365569829940796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9623491764068604, "epoch": 5.63, "learning_rate": 2.425565887104349e-05, "loss": 0.9176, "step": 6665, "task_loss": 0.9687021374702454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5881646275520325, "epoch": 5.63, "learning_rate": 2.4250962712501176e-05, "loss": 0.7194, "step": 6666, "task_loss": 0.5054254531860352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6852603554725647, "epoch": 5.64, "learning_rate": 2.4246266553958862e-05, "loss": 0.8719, "step": 6667, "task_loss": 1.188958764076233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9855988621711731, "epoch": 5.64, "learning_rate": 2.424157039541655e-05, "loss": 1.0686, "step": 6668, "task_loss": 0.34887078404426575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0391896963119507, "epoch": 5.64, "learning_rate": 2.4236874236874238e-05, "loss": 0.9027, "step": 6669, "task_loss": 0.49263638257980347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1376631259918213, "epoch": 5.64, "learning_rate": 2.4232178078331928e-05, "loss": 1.2113, "step": 6670, "task_loss": 0.8850348591804504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4165385365486145, "epoch": 5.64, "learning_rate": 2.4227481919789614e-05, "loss": 0.6567, "step": 6671, "task_loss": 0.3592468202114105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0144288539886475, "epoch": 5.64, "learning_rate": 2.42227857612473e-05, "loss": 0.894, "step": 6672, "task_loss": 0.6760537624359131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2503125667572021, "epoch": 5.64, "learning_rate": 2.4218089602704987e-05, "loss": 1.1558, "step": 6673, "task_loss": 1.7010104656219482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8617770075798035, "epoch": 5.64, "learning_rate": 2.4213393444162677e-05, "loss": 0.8074, "step": 6674, "task_loss": 0.9017760753631592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8227635025978088, "epoch": 5.64, "learning_rate": 2.4208697285620363e-05, "loss": 0.862, "step": 6675, "task_loss": 0.45298412442207336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41040104627609253, "epoch": 5.64, "learning_rate": 2.4204001127078053e-05, "loss": 0.7998, "step": 6676, "task_loss": 0.32433179020881653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1924923658370972, "epoch": 5.64, "learning_rate": 2.419930496853574e-05, "loss": 0.7552, "step": 6677, "task_loss": 0.9899694919586182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42795342206954956, "epoch": 5.64, "learning_rate": 2.419460880999343e-05, "loss": 0.7436, "step": 6678, "task_loss": 0.16966484487056732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6508484482765198, "epoch": 5.65, "learning_rate": 2.418991265145111e-05, "loss": 0.7745, "step": 6679, "task_loss": 0.570151686668396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9103101491928101, "epoch": 5.65, "learning_rate": 2.41852164929088e-05, "loss": 1.0704, "step": 6680, "task_loss": 0.935458242893219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6806714534759521, "epoch": 5.65, "learning_rate": 2.4180520334366488e-05, "loss": 0.6356, "step": 6681, "task_loss": 0.20941734313964844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3732355833053589, "epoch": 5.65, "learning_rate": 2.4175824175824177e-05, "loss": 0.5277, "step": 6682, "task_loss": 0.24642124772071838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6289505362510681, "epoch": 5.65, "learning_rate": 2.4171128017281864e-05, "loss": 0.7801, "step": 6683, "task_loss": 0.5463180541992188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3820422887802124, "epoch": 5.65, "learning_rate": 2.4166431858739553e-05, "loss": 1.0588, "step": 6684, "task_loss": 0.7473270893096924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1287500858306885, "epoch": 5.65, "learning_rate": 2.416173570019724e-05, "loss": 0.9459, "step": 6685, "task_loss": 1.1090108156204224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9112065434455872, "epoch": 5.65, "learning_rate": 2.4157039541654926e-05, "loss": 0.9122, "step": 6686, "task_loss": 1.4162384271621704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4340060353279114, "epoch": 5.65, "learning_rate": 2.4152343383112616e-05, "loss": 0.5113, "step": 6687, "task_loss": 0.17812488973140717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.525891661643982, "epoch": 5.65, "learning_rate": 2.4147647224570302e-05, "loss": 1.253, "step": 6688, "task_loss": 1.1492385864257812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5644545555114746, "epoch": 5.65, "learning_rate": 2.4142951066027992e-05, "loss": 0.8301, "step": 6689, "task_loss": 1.2723928689956665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6596430540084839, "epoch": 5.65, "learning_rate": 2.413825490748568e-05, "loss": 0.7365, "step": 6690, "task_loss": 1.4265496730804443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.263991117477417, "epoch": 5.66, "learning_rate": 2.4133558748943365e-05, "loss": 1.3269, "step": 6691, "task_loss": 1.1927897930145264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4359973669052124, "epoch": 5.66, "learning_rate": 2.412886259040105e-05, "loss": 1.0764, "step": 6692, "task_loss": 2.1725056171417236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8909645080566406, "epoch": 5.66, "learning_rate": 2.412416643185874e-05, "loss": 1.0169, "step": 6693, "task_loss": 0.8642861843109131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8395137786865234, "epoch": 5.66, "learning_rate": 2.4119470273316427e-05, "loss": 0.8691, "step": 6694, "task_loss": 0.3963286876678467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0311405658721924, "epoch": 5.66, "learning_rate": 2.4114774114774117e-05, "loss": 0.9685, "step": 6695, "task_loss": 0.8225850462913513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5447734594345093, "epoch": 5.66, "learning_rate": 2.4110077956231803e-05, "loss": 0.9011, "step": 6696, "task_loss": 1.3392260074615479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.802505612373352, "epoch": 5.66, "learning_rate": 2.4105381797689493e-05, "loss": 0.855, "step": 6697, "task_loss": 0.7908006310462952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5784937143325806, "epoch": 5.66, "learning_rate": 2.4100685639147176e-05, "loss": 0.7935, "step": 6698, "task_loss": 0.11412408202886581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7033073902130127, "epoch": 5.66, "learning_rate": 2.4095989480604866e-05, "loss": 0.7459, "step": 6699, "task_loss": 1.0092612504959106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9805001020431519, "epoch": 5.66, "learning_rate": 2.4091293322062555e-05, "loss": 0.8578, "step": 6700, "task_loss": 1.5310640335083008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8748195171356201, "epoch": 5.66, "learning_rate": 2.408659716352024e-05, "loss": 0.7431, "step": 6701, "task_loss": 0.6357121467590332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7046779990196228, "epoch": 5.66, "learning_rate": 2.408190100497793e-05, "loss": 0.7003, "step": 6702, "task_loss": 0.5363360047340393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8532818555831909, "epoch": 5.67, "learning_rate": 2.4077204846435618e-05, "loss": 0.7038, "step": 6703, "task_loss": 0.9850608110427856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.796766996383667, "epoch": 5.67, "learning_rate": 2.4072508687893304e-05, "loss": 0.9378, "step": 6704, "task_loss": 1.1761103868484497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1326903104782104, "epoch": 5.67, "learning_rate": 2.406781252935099e-05, "loss": 0.9964, "step": 6705, "task_loss": 2.0679876804351807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9914113283157349, "epoch": 5.67, "learning_rate": 2.406311637080868e-05, "loss": 0.6376, "step": 6706, "task_loss": 1.0147066116333008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8448833227157593, "epoch": 5.67, "learning_rate": 2.4058420212266366e-05, "loss": 0.6156, "step": 6707, "task_loss": 1.4642422199249268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.20888352394104, "epoch": 5.67, "learning_rate": 2.4053724053724056e-05, "loss": 0.9943, "step": 6708, "task_loss": 0.8595589399337769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8504546284675598, "epoch": 5.67, "learning_rate": 2.4049027895181742e-05, "loss": 0.8435, "step": 6709, "task_loss": 0.6555991768836975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7975234389305115, "epoch": 5.67, "learning_rate": 2.404433173663943e-05, "loss": 0.8942, "step": 6710, "task_loss": 0.6494213938713074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2918760776519775, "epoch": 5.67, "learning_rate": 2.4039635578097115e-05, "loss": 1.1355, "step": 6711, "task_loss": 1.321217656135559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5624276995658875, "epoch": 5.67, "learning_rate": 2.4034939419554805e-05, "loss": 0.9129, "step": 6712, "task_loss": 0.2604634165763855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0573899745941162, "epoch": 5.67, "learning_rate": 2.403024326101249e-05, "loss": 0.8844, "step": 6713, "task_loss": 0.7724015712738037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0694206953048706, "epoch": 5.67, "learning_rate": 2.402554710247018e-05, "loss": 0.9168, "step": 6714, "task_loss": 1.3042945861816406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4901265501976013, "epoch": 5.68, "learning_rate": 2.402085094392787e-05, "loss": 0.8981, "step": 6715, "task_loss": 0.8523624539375305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7569620609283447, "epoch": 5.68, "learning_rate": 2.4016154785385557e-05, "loss": 0.7899, "step": 6716, "task_loss": 0.24348358809947968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7553989887237549, "epoch": 5.68, "learning_rate": 2.4011458626843243e-05, "loss": 1.0582, "step": 6717, "task_loss": 0.5923621654510498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1189004182815552, "epoch": 5.68, "learning_rate": 2.400676246830093e-05, "loss": 0.8693, "step": 6718, "task_loss": 2.53605318069458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.646579623222351, "epoch": 5.68, "learning_rate": 2.400206630975862e-05, "loss": 1.2091, "step": 6719, "task_loss": 0.9406073093414307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0304995775222778, "epoch": 5.68, "learning_rate": 2.3997370151216306e-05, "loss": 0.7912, "step": 6720, "task_loss": 1.0132784843444824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3174275159835815, "epoch": 5.68, "learning_rate": 2.3992673992673995e-05, "loss": 0.9472, "step": 6721, "task_loss": 0.9508304595947266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.72683185338974, "epoch": 5.68, "learning_rate": 2.3987977834131682e-05, "loss": 0.9025, "step": 6722, "task_loss": 0.5376559495925903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6041665077209473, "epoch": 5.68, "learning_rate": 2.3983281675589368e-05, "loss": 1.0372, "step": 6723, "task_loss": 1.9549996852874756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0780448913574219, "epoch": 5.68, "learning_rate": 2.3978585517047055e-05, "loss": 1.0185, "step": 6724, "task_loss": 1.1124032735824585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8352065086364746, "epoch": 5.68, "learning_rate": 2.3973889358504744e-05, "loss": 1.0932, "step": 6725, "task_loss": 0.8958531022071838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6865662336349487, "epoch": 5.69, "learning_rate": 2.396919319996243e-05, "loss": 0.8455, "step": 6726, "task_loss": 0.8816431164741516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1492726802825928, "epoch": 5.69, "learning_rate": 2.396449704142012e-05, "loss": 1.3024, "step": 6727, "task_loss": 1.3239974975585938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0976980924606323, "epoch": 5.69, "learning_rate": 2.3959800882877807e-05, "loss": 0.9285, "step": 6728, "task_loss": 2.160352945327759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43430736660957336, "epoch": 5.69, "learning_rate": 2.3955104724335496e-05, "loss": 0.9334, "step": 6729, "task_loss": 0.5409883260726929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8112225532531738, "epoch": 5.69, "learning_rate": 2.3950408565793183e-05, "loss": 0.6808, "step": 6730, "task_loss": 1.3946338891983032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7746355533599854, "epoch": 5.69, "learning_rate": 2.394571240725087e-05, "loss": 0.7881, "step": 6731, "task_loss": 1.8856667280197144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7276774644851685, "epoch": 5.69, "learning_rate": 2.394101624870856e-05, "loss": 0.9099, "step": 6732, "task_loss": 0.9453284740447998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5128403902053833, "epoch": 5.69, "learning_rate": 2.3936320090166245e-05, "loss": 0.6967, "step": 6733, "task_loss": 0.3819772005081177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5772033929824829, "epoch": 5.69, "learning_rate": 2.3931623931623935e-05, "loss": 0.7999, "step": 6734, "task_loss": 1.0237631797790527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8991644382476807, "epoch": 5.69, "learning_rate": 2.392692777308162e-05, "loss": 0.9944, "step": 6735, "task_loss": 0.6283984184265137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5267059803009033, "epoch": 5.69, "learning_rate": 2.3922231614539308e-05, "loss": 0.6924, "step": 6736, "task_loss": 0.6664329767227173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6005940437316895, "epoch": 5.69, "learning_rate": 2.3917535455996994e-05, "loss": 0.674, "step": 6737, "task_loss": 0.6118977665901184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.950215756893158, "epoch": 5.7, "learning_rate": 2.3912839297454684e-05, "loss": 0.9105, "step": 6738, "task_loss": 0.9880369901657104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7498821020126343, "epoch": 5.7, "learning_rate": 2.390814313891237e-05, "loss": 0.8105, "step": 6739, "task_loss": 0.25456881523132324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7769209146499634, "epoch": 5.7, "learning_rate": 2.390344698037006e-05, "loss": 0.7932, "step": 6740, "task_loss": 0.5025526881217957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5184406638145447, "epoch": 5.7, "learning_rate": 2.3898750821827746e-05, "loss": 0.6267, "step": 6741, "task_loss": 1.1019361019134521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9276410341262817, "epoch": 5.7, "learning_rate": 2.3894054663285432e-05, "loss": 1.1008, "step": 6742, "task_loss": 1.0844733715057373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1132755279541016, "epoch": 5.7, "learning_rate": 2.388935850474312e-05, "loss": 0.9916, "step": 6743, "task_loss": 1.3964111804962158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0007911920547485, "epoch": 5.7, "learning_rate": 2.388466234620081e-05, "loss": 0.8491, "step": 6744, "task_loss": 1.0391061305999756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1819854974746704, "epoch": 5.7, "learning_rate": 2.3879966187658495e-05, "loss": 1.0246, "step": 6745, "task_loss": 1.2066038846969604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.538358211517334, "epoch": 5.7, "learning_rate": 2.3875270029116184e-05, "loss": 0.8268, "step": 6746, "task_loss": 1.1701951026916504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7412750720977783, "epoch": 5.7, "learning_rate": 2.3870573870573874e-05, "loss": 0.9877, "step": 6747, "task_loss": 0.2012520432472229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4635162353515625, "epoch": 5.7, "learning_rate": 2.386587771203156e-05, "loss": 0.7606, "step": 6748, "task_loss": 0.6078482270240784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7772266864776611, "epoch": 5.7, "learning_rate": 2.3861181553489247e-05, "loss": 0.767, "step": 6749, "task_loss": 1.1827738285064697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6688496470451355, "epoch": 5.71, "learning_rate": 2.3856485394946933e-05, "loss": 0.5653, "step": 6750, "task_loss": 0.2681787610054016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5339338779449463, "epoch": 5.71, "learning_rate": 2.3851789236404623e-05, "loss": 0.7668, "step": 6751, "task_loss": 0.8933846354484558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1146878004074097, "epoch": 5.71, "learning_rate": 2.384709307786231e-05, "loss": 0.8252, "step": 6752, "task_loss": 1.0834637880325317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7285690307617188, "epoch": 5.71, "learning_rate": 2.384239691932e-05, "loss": 0.794, "step": 6753, "task_loss": 0.8879833221435547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1959084272384644, "epoch": 5.71, "learning_rate": 2.3837700760777685e-05, "loss": 1.0448, "step": 6754, "task_loss": 2.1426124572753906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6592192649841309, "epoch": 5.71, "learning_rate": 2.3833004602235372e-05, "loss": 0.9806, "step": 6755, "task_loss": 1.3773459196090698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5082616209983826, "epoch": 5.71, "learning_rate": 2.3828308443693058e-05, "loss": 0.6312, "step": 6756, "task_loss": 0.45518985390663147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1696619987487793, "epoch": 5.71, "learning_rate": 2.3823612285150748e-05, "loss": 0.9166, "step": 6757, "task_loss": 1.404418706893921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6670087575912476, "epoch": 5.71, "learning_rate": 2.3818916126608434e-05, "loss": 0.7711, "step": 6758, "task_loss": 0.7160468101501465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6644537448883057, "epoch": 5.71, "learning_rate": 2.3814219968066124e-05, "loss": 0.6783, "step": 6759, "task_loss": 0.9527550339698792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7729899287223816, "epoch": 5.71, "learning_rate": 2.380952380952381e-05, "loss": 0.712, "step": 6760, "task_loss": 0.9332108497619629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6533719301223755, "epoch": 5.71, "learning_rate": 2.38048276509815e-05, "loss": 0.9119, "step": 6761, "task_loss": 0.5385382771492004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6401979923248291, "epoch": 5.72, "learning_rate": 2.3800131492439186e-05, "loss": 0.7172, "step": 6762, "task_loss": 0.6598042249679565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7858419418334961, "epoch": 5.72, "learning_rate": 2.3795435333896873e-05, "loss": 0.6833, "step": 6763, "task_loss": 0.41403624415397644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31794190406799316, "epoch": 5.72, "learning_rate": 2.3790739175354562e-05, "loss": 0.7587, "step": 6764, "task_loss": 1.0106934309005737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.921808660030365, "epoch": 5.72, "learning_rate": 2.378604301681225e-05, "loss": 1.1638, "step": 6765, "task_loss": 0.6214282512664795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9194267392158508, "epoch": 5.72, "learning_rate": 2.378134685826994e-05, "loss": 1.0979, "step": 6766, "task_loss": 1.3342158794403076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9982989430427551, "epoch": 5.72, "learning_rate": 2.3776650699727625e-05, "loss": 1.0169, "step": 6767, "task_loss": 1.1610156297683716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.811760425567627, "epoch": 5.72, "learning_rate": 2.377195454118531e-05, "loss": 0.8742, "step": 6768, "task_loss": 0.829494833946228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5429058074951172, "epoch": 5.72, "learning_rate": 2.3767258382642997e-05, "loss": 1.045, "step": 6769, "task_loss": 1.6773239374160767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5907358527183533, "epoch": 5.72, "learning_rate": 2.3762562224100687e-05, "loss": 0.7696, "step": 6770, "task_loss": 0.4714205861091614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6665833592414856, "epoch": 5.72, "learning_rate": 2.3757866065558373e-05, "loss": 1.0836, "step": 6771, "task_loss": 0.7656214833259583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7866262793540955, "epoch": 5.72, "learning_rate": 2.3753169907016063e-05, "loss": 0.7931, "step": 6772, "task_loss": 0.9509865045547485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5562241673469543, "epoch": 5.72, "learning_rate": 2.374847374847375e-05, "loss": 1.0659, "step": 6773, "task_loss": 1.858856439590454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6117932796478271, "epoch": 5.73, "learning_rate": 2.3743777589931436e-05, "loss": 0.7705, "step": 6774, "task_loss": 0.6235654950141907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9147313833236694, "epoch": 5.73, "learning_rate": 2.3739081431389122e-05, "loss": 0.7981, "step": 6775, "task_loss": 0.9420831203460693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7163195013999939, "epoch": 5.73, "learning_rate": 2.3734385272846812e-05, "loss": 0.6782, "step": 6776, "task_loss": 0.7349075078964233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6782357096672058, "epoch": 5.73, "learning_rate": 2.37296891143045e-05, "loss": 0.5179, "step": 6777, "task_loss": 0.1663813441991806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6031697988510132, "epoch": 5.73, "learning_rate": 2.3724992955762188e-05, "loss": 0.7394, "step": 6778, "task_loss": 0.4277181029319763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7201300263404846, "epoch": 5.73, "learning_rate": 2.3720296797219878e-05, "loss": 0.9564, "step": 6779, "task_loss": 1.1576415300369263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.707104504108429, "epoch": 5.73, "learning_rate": 2.3715600638677564e-05, "loss": 0.6953, "step": 6780, "task_loss": 0.6471700668334961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6574745774269104, "epoch": 5.73, "learning_rate": 2.371090448013525e-05, "loss": 0.8762, "step": 6781, "task_loss": 0.7707016468048096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7267427444458008, "epoch": 5.73, "learning_rate": 2.3706208321592937e-05, "loss": 1.0042, "step": 6782, "task_loss": 0.6043946146965027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.623580813407898, "epoch": 5.73, "learning_rate": 2.3701512163050626e-05, "loss": 0.7628, "step": 6783, "task_loss": 0.365390419960022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.776829183101654, "epoch": 5.73, "learning_rate": 2.3696816004508313e-05, "loss": 0.784, "step": 6784, "task_loss": 0.5500479340553284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8256927132606506, "epoch": 5.73, "learning_rate": 2.3692119845966003e-05, "loss": 0.6999, "step": 6785, "task_loss": 0.713874876499176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7083070278167725, "epoch": 5.74, "learning_rate": 2.368742368742369e-05, "loss": 0.6785, "step": 6786, "task_loss": 1.2058771848678589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4797181785106659, "epoch": 5.74, "learning_rate": 2.3682727528881375e-05, "loss": 0.8411, "step": 6787, "task_loss": 0.6223994493484497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8830306529998779, "epoch": 5.74, "learning_rate": 2.367803137033906e-05, "loss": 1.0415, "step": 6788, "task_loss": 0.5363203287124634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5664002895355225, "epoch": 5.74, "learning_rate": 2.367333521179675e-05, "loss": 0.9076, "step": 6789, "task_loss": 0.4508720636367798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7933081388473511, "epoch": 5.74, "learning_rate": 2.3668639053254438e-05, "loss": 0.6717, "step": 6790, "task_loss": 0.861663281917572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7799925804138184, "epoch": 5.74, "learning_rate": 2.3663942894712127e-05, "loss": 0.8724, "step": 6791, "task_loss": 1.1644635200500488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0297553539276123, "epoch": 5.74, "learning_rate": 2.3659246736169817e-05, "loss": 1.1022, "step": 6792, "task_loss": 0.715162992477417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9881187081336975, "epoch": 5.74, "learning_rate": 2.36545505776275e-05, "loss": 0.7297, "step": 6793, "task_loss": 1.5119588375091553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.967006266117096, "epoch": 5.74, "learning_rate": 2.364985441908519e-05, "loss": 0.8879, "step": 6794, "task_loss": 0.4291415214538574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8909153938293457, "epoch": 5.74, "learning_rate": 2.3645158260542876e-05, "loss": 0.9404, "step": 6795, "task_loss": 1.2705793380737305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9877798557281494, "epoch": 5.74, "learning_rate": 2.3640462102000566e-05, "loss": 1.0145, "step": 6796, "task_loss": 0.7189785242080688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9437059164047241, "epoch": 5.75, "learning_rate": 2.3635765943458252e-05, "loss": 0.8979, "step": 6797, "task_loss": 0.7640867233276367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.751459538936615, "epoch": 5.75, "learning_rate": 2.3631069784915942e-05, "loss": 0.768, "step": 6798, "task_loss": 0.292072594165802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.624165415763855, "epoch": 5.75, "learning_rate": 2.3626373626373628e-05, "loss": 1.1744, "step": 6799, "task_loss": 0.9591875076293945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5254432559013367, "epoch": 5.75, "learning_rate": 2.3621677467831315e-05, "loss": 0.6548, "step": 6800, "task_loss": 0.5657554268836975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.220070242881775, "epoch": 5.75, "learning_rate": 2.3616981309289e-05, "loss": 0.9971, "step": 6801, "task_loss": 1.030498743057251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0088276863098145, "epoch": 5.75, "learning_rate": 2.361228515074669e-05, "loss": 0.8412, "step": 6802, "task_loss": 1.1409316062927246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9284648299217224, "epoch": 5.75, "learning_rate": 2.3607588992204377e-05, "loss": 1.0028, "step": 6803, "task_loss": 0.6684693694114685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7246572375297546, "epoch": 5.75, "learning_rate": 2.3602892833662067e-05, "loss": 0.7674, "step": 6804, "task_loss": 0.585976779460907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6347731351852417, "epoch": 5.75, "learning_rate": 2.3598196675119753e-05, "loss": 0.8853, "step": 6805, "task_loss": 0.645514965057373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8917719721794128, "epoch": 5.75, "learning_rate": 2.359350051657744e-05, "loss": 0.8821, "step": 6806, "task_loss": 1.1655433177947998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6586019992828369, "epoch": 5.75, "learning_rate": 2.358880435803513e-05, "loss": 0.7864, "step": 6807, "task_loss": 0.5877671837806702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2357449531555176, "epoch": 5.75, "learning_rate": 2.3584108199492815e-05, "loss": 1.0226, "step": 6808, "task_loss": 0.6219791769981384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8493270874023438, "epoch": 5.76, "learning_rate": 2.3579412040950505e-05, "loss": 0.7705, "step": 6809, "task_loss": 1.848942756652832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9094938635826111, "epoch": 5.76, "learning_rate": 2.357471588240819e-05, "loss": 0.8384, "step": 6810, "task_loss": 0.21877077221870422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7793458700180054, "epoch": 5.76, "learning_rate": 2.357001972386588e-05, "loss": 0.7947, "step": 6811, "task_loss": 0.7530227899551392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47906556725502014, "epoch": 5.76, "learning_rate": 2.3565323565323568e-05, "loss": 0.6918, "step": 6812, "task_loss": 0.9564513564109802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7396922707557678, "epoch": 5.76, "learning_rate": 2.3560627406781254e-05, "loss": 0.8536, "step": 6813, "task_loss": 0.7701924443244934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8556114435195923, "epoch": 5.76, "learning_rate": 2.355593124823894e-05, "loss": 0.7944, "step": 6814, "task_loss": 0.4380066394805908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8847439289093018, "epoch": 5.76, "learning_rate": 2.355123508969663e-05, "loss": 0.8041, "step": 6815, "task_loss": 0.7861074805259705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4308665990829468, "epoch": 5.76, "learning_rate": 2.3546538931154316e-05, "loss": 0.7189, "step": 6816, "task_loss": 0.7584428787231445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0965330600738525, "epoch": 5.76, "learning_rate": 2.3541842772612006e-05, "loss": 0.9881, "step": 6817, "task_loss": 2.0120816230773926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6412811279296875, "epoch": 5.76, "learning_rate": 2.3537146614069692e-05, "loss": 0.7531, "step": 6818, "task_loss": 1.0736066102981567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7721224427223206, "epoch": 5.76, "learning_rate": 2.353245045552738e-05, "loss": 0.876, "step": 6819, "task_loss": 1.252653956413269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1107646226882935, "epoch": 5.76, "learning_rate": 2.3527754296985065e-05, "loss": 0.886, "step": 6820, "task_loss": 1.2342886924743652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4007880687713623, "epoch": 5.77, "learning_rate": 2.3523058138442755e-05, "loss": 0.699, "step": 6821, "task_loss": 0.30185335874557495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5165782570838928, "epoch": 5.77, "learning_rate": 2.351836197990044e-05, "loss": 0.8384, "step": 6822, "task_loss": 0.3213224709033966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0041896104812622, "epoch": 5.77, "learning_rate": 2.351366582135813e-05, "loss": 1.0442, "step": 6823, "task_loss": 1.0806617736816406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7615369558334351, "epoch": 5.77, "learning_rate": 2.350896966281582e-05, "loss": 0.7516, "step": 6824, "task_loss": 0.2946266829967499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8365009427070618, "epoch": 5.77, "learning_rate": 2.3504273504273504e-05, "loss": 0.9386, "step": 6825, "task_loss": 1.0491260290145874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1123210191726685, "epoch": 5.77, "learning_rate": 2.3499577345731193e-05, "loss": 0.9587, "step": 6826, "task_loss": 1.2643579244613647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8848245143890381, "epoch": 5.77, "learning_rate": 2.349488118718888e-05, "loss": 0.8763, "step": 6827, "task_loss": 0.9632453322410583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4013727903366089, "epoch": 5.77, "learning_rate": 2.349018502864657e-05, "loss": 1.016, "step": 6828, "task_loss": 0.8766739368438721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.702363133430481, "epoch": 5.77, "learning_rate": 2.3485488870104256e-05, "loss": 0.969, "step": 6829, "task_loss": 0.8650076985359192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3478219509124756, "epoch": 5.77, "learning_rate": 2.3480792711561945e-05, "loss": 0.9902, "step": 6830, "task_loss": 0.5623361468315125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6801795959472656, "epoch": 5.77, "learning_rate": 2.3476096553019632e-05, "loss": 0.8089, "step": 6831, "task_loss": 0.3656201958656311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0713603496551514, "epoch": 5.77, "learning_rate": 2.3471400394477318e-05, "loss": 0.75, "step": 6832, "task_loss": 1.4277788400650024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46765953302383423, "epoch": 5.78, "learning_rate": 2.3466704235935004e-05, "loss": 0.9383, "step": 6833, "task_loss": 1.117329716682434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0151019096374512, "epoch": 5.78, "learning_rate": 2.3462008077392694e-05, "loss": 0.9692, "step": 6834, "task_loss": 1.146822452545166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8616476058959961, "epoch": 5.78, "learning_rate": 2.345731191885038e-05, "loss": 0.8763, "step": 6835, "task_loss": 1.432313323020935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5261510610580444, "epoch": 5.78, "learning_rate": 2.345261576030807e-05, "loss": 1.0397, "step": 6836, "task_loss": 0.7682374715805054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8626473546028137, "epoch": 5.78, "learning_rate": 2.3447919601765757e-05, "loss": 0.6752, "step": 6837, "task_loss": 0.8887458443641663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6712104082107544, "epoch": 5.78, "learning_rate": 2.3443223443223443e-05, "loss": 0.7387, "step": 6838, "task_loss": 0.3157694637775421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7716078162193298, "epoch": 5.78, "learning_rate": 2.3438527284681133e-05, "loss": 0.7344, "step": 6839, "task_loss": 1.7729289531707764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.392484188079834, "epoch": 5.78, "learning_rate": 2.343383112613882e-05, "loss": 1.1932, "step": 6840, "task_loss": 1.2306017875671387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8327536582946777, "epoch": 5.78, "learning_rate": 2.342913496759651e-05, "loss": 0.7635, "step": 6841, "task_loss": 0.8172356486320496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4226163625717163, "epoch": 5.78, "learning_rate": 2.3424438809054195e-05, "loss": 0.6688, "step": 6842, "task_loss": 1.0246753692626953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.238809585571289, "epoch": 5.78, "learning_rate": 2.3419742650511885e-05, "loss": 1.1311, "step": 6843, "task_loss": 0.928219735622406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8885261416435242, "epoch": 5.78, "learning_rate": 2.341504649196957e-05, "loss": 0.8643, "step": 6844, "task_loss": 1.058905005455017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3926240801811218, "epoch": 5.79, "learning_rate": 2.3410350333427257e-05, "loss": 0.6061, "step": 6845, "task_loss": 0.30899667739868164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.780731737613678, "epoch": 5.79, "learning_rate": 2.3405654174884944e-05, "loss": 0.9031, "step": 6846, "task_loss": 0.2933547794818878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6000924706459045, "epoch": 5.79, "learning_rate": 2.3400958016342634e-05, "loss": 0.8733, "step": 6847, "task_loss": 1.0487849712371826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7927223443984985, "epoch": 5.79, "learning_rate": 2.339626185780032e-05, "loss": 0.8149, "step": 6848, "task_loss": 1.0508795976638794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8007731437683105, "epoch": 5.79, "learning_rate": 2.339156569925801e-05, "loss": 0.7054, "step": 6849, "task_loss": 1.2897098064422607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9222687482833862, "epoch": 5.79, "learning_rate": 2.3386869540715696e-05, "loss": 0.7854, "step": 6850, "task_loss": 0.7149814367294312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8194894790649414, "epoch": 5.79, "learning_rate": 2.3382173382173382e-05, "loss": 0.9157, "step": 6851, "task_loss": 1.2025891542434692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48078885674476624, "epoch": 5.79, "learning_rate": 2.337747722363107e-05, "loss": 0.821, "step": 6852, "task_loss": 0.4648517370223999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8525031805038452, "epoch": 5.79, "learning_rate": 2.337278106508876e-05, "loss": 0.874, "step": 6853, "task_loss": 1.6228238344192505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7417672872543335, "epoch": 5.79, "learning_rate": 2.3368084906546448e-05, "loss": 0.8964, "step": 6854, "task_loss": 0.8348963260650635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6634053587913513, "epoch": 5.79, "learning_rate": 2.3363388748004134e-05, "loss": 0.7086, "step": 6855, "task_loss": 0.7965723872184753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2074451446533203, "epoch": 5.79, "learning_rate": 2.3358692589461824e-05, "loss": 0.8537, "step": 6856, "task_loss": 0.842383086681366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.488761842250824, "epoch": 5.8, "learning_rate": 2.3353996430919507e-05, "loss": 0.6997, "step": 6857, "task_loss": 0.9014686346054077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9364867806434631, "epoch": 5.8, "learning_rate": 2.3349300272377197e-05, "loss": 0.8721, "step": 6858, "task_loss": 1.3353360891342163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6031842231750488, "epoch": 5.8, "learning_rate": 2.3344604113834883e-05, "loss": 0.6625, "step": 6859, "task_loss": 0.8409504890441895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.097886562347412, "epoch": 5.8, "learning_rate": 2.3339907955292573e-05, "loss": 0.8062, "step": 6860, "task_loss": 2.1127097606658936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9631848335266113, "epoch": 5.8, "learning_rate": 2.333521179675026e-05, "loss": 0.7126, "step": 6861, "task_loss": 0.678386926651001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5549660325050354, "epoch": 5.8, "learning_rate": 2.333051563820795e-05, "loss": 0.7845, "step": 6862, "task_loss": 1.241860270500183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9397750496864319, "epoch": 5.8, "learning_rate": 2.3325819479665635e-05, "loss": 0.8079, "step": 6863, "task_loss": 1.0670709609985352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.206024169921875, "epoch": 5.8, "learning_rate": 2.332112332112332e-05, "loss": 0.8427, "step": 6864, "task_loss": 1.2476564645767212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45083916187286377, "epoch": 5.8, "learning_rate": 2.3316427162581008e-05, "loss": 0.6782, "step": 6865, "task_loss": 0.7965219020843506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6085028052330017, "epoch": 5.8, "learning_rate": 2.3311731004038698e-05, "loss": 0.7047, "step": 6866, "task_loss": 0.5366711020469666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.695631742477417, "epoch": 5.8, "learning_rate": 2.3307034845496384e-05, "loss": 0.9995, "step": 6867, "task_loss": 0.4640756845474243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5855199098587036, "epoch": 5.81, "learning_rate": 2.3302338686954074e-05, "loss": 0.7894, "step": 6868, "task_loss": 0.615359902381897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7982842922210693, "epoch": 5.81, "learning_rate": 2.329764252841176e-05, "loss": 0.8827, "step": 6869, "task_loss": 1.1131811141967773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.100006103515625, "epoch": 5.81, "learning_rate": 2.3292946369869446e-05, "loss": 0.7533, "step": 6870, "task_loss": 0.7323358058929443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.645458459854126, "epoch": 5.81, "learning_rate": 2.3288250211327136e-05, "loss": 0.7548, "step": 6871, "task_loss": 1.2488489151000977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9161185026168823, "epoch": 5.81, "learning_rate": 2.3283554052784823e-05, "loss": 0.6904, "step": 6872, "task_loss": 1.0704158544540405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0343974828720093, "epoch": 5.81, "learning_rate": 2.3278857894242512e-05, "loss": 0.837, "step": 6873, "task_loss": 0.44171762466430664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1298015117645264, "epoch": 5.81, "learning_rate": 2.32741617357002e-05, "loss": 1.0008, "step": 6874, "task_loss": 0.22064220905303955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4739595651626587, "epoch": 5.81, "learning_rate": 2.326946557715789e-05, "loss": 0.8813, "step": 6875, "task_loss": 0.4515654742717743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2203991413116455, "epoch": 5.81, "learning_rate": 2.326476941861557e-05, "loss": 0.7585, "step": 6876, "task_loss": 0.6659778356552124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7335408926010132, "epoch": 5.81, "learning_rate": 2.326007326007326e-05, "loss": 0.742, "step": 6877, "task_loss": 0.9036747813224792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5397753715515137, "epoch": 5.81, "learning_rate": 2.3255377101530947e-05, "loss": 0.8115, "step": 6878, "task_loss": 0.554964005947113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1102254390716553, "epoch": 5.81, "learning_rate": 2.3250680942988637e-05, "loss": 1.5171, "step": 6879, "task_loss": 1.0457664728164673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7757078409194946, "epoch": 5.82, "learning_rate": 2.3245984784446323e-05, "loss": 1.0079, "step": 6880, "task_loss": 0.44565996527671814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9207782745361328, "epoch": 5.82, "learning_rate": 2.3241288625904013e-05, "loss": 0.7857, "step": 6881, "task_loss": 1.2600562572479248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0353004932403564, "epoch": 5.82, "learning_rate": 2.32365924673617e-05, "loss": 0.9306, "step": 6882, "task_loss": 1.4275633096694946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5660714507102966, "epoch": 5.82, "learning_rate": 2.3231896308819386e-05, "loss": 0.6493, "step": 6883, "task_loss": 0.4003981053829193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6590543985366821, "epoch": 5.82, "learning_rate": 2.3227200150277076e-05, "loss": 0.5386, "step": 6884, "task_loss": 1.0416232347488403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9789127707481384, "epoch": 5.82, "learning_rate": 2.3222503991734762e-05, "loss": 0.8714, "step": 6885, "task_loss": 0.3599562644958496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7775999307632446, "epoch": 5.82, "learning_rate": 2.321780783319245e-05, "loss": 0.8847, "step": 6886, "task_loss": 0.43331417441368103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1021268367767334, "epoch": 5.82, "learning_rate": 2.3213111674650138e-05, "loss": 0.8589, "step": 6887, "task_loss": 1.0699381828308105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5809930562973022, "epoch": 5.82, "learning_rate": 2.3208415516107824e-05, "loss": 0.6631, "step": 6888, "task_loss": 0.4839063286781311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7073726654052734, "epoch": 5.82, "learning_rate": 2.320371935756551e-05, "loss": 0.8241, "step": 6889, "task_loss": 0.3301776051521301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.095102071762085, "epoch": 5.82, "learning_rate": 2.31990231990232e-05, "loss": 0.7447, "step": 6890, "task_loss": 1.1450133323669434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6422443985939026, "epoch": 5.82, "learning_rate": 2.3194327040480887e-05, "loss": 0.8323, "step": 6891, "task_loss": 0.27851662039756775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6124929189682007, "epoch": 5.83, "learning_rate": 2.3189630881938576e-05, "loss": 0.7289, "step": 6892, "task_loss": 0.136086106300354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0941150188446045, "epoch": 5.83, "learning_rate": 2.3184934723396263e-05, "loss": 0.9418, "step": 6893, "task_loss": 0.9242238402366638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5658001899719238, "epoch": 5.83, "learning_rate": 2.3180238564853952e-05, "loss": 0.9445, "step": 6894, "task_loss": 1.219571828842163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5848703384399414, "epoch": 5.83, "learning_rate": 2.317554240631164e-05, "loss": 0.716, "step": 6895, "task_loss": 0.4117252826690674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.651708722114563, "epoch": 5.83, "learning_rate": 2.3170846247769325e-05, "loss": 1.1344, "step": 6896, "task_loss": 1.7041630744934082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7270351648330688, "epoch": 5.83, "learning_rate": 2.316615008922701e-05, "loss": 0.8257, "step": 6897, "task_loss": 1.5910816192626953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0408061742782593, "epoch": 5.83, "learning_rate": 2.31614539306847e-05, "loss": 1.0158, "step": 6898, "task_loss": 0.37689855694770813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4008333086967468, "epoch": 5.83, "learning_rate": 2.3156757772142388e-05, "loss": 0.6601, "step": 6899, "task_loss": 0.41418376564979553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3549655675888062, "epoch": 5.83, "learning_rate": 2.3152061613600077e-05, "loss": 0.8028, "step": 6900, "task_loss": 1.4995496273040771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7510675191879272, "epoch": 5.83, "learning_rate": 2.3147365455057764e-05, "loss": 0.872, "step": 6901, "task_loss": 1.2363568544387817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9821670055389404, "epoch": 5.83, "learning_rate": 2.314266929651545e-05, "loss": 0.9928, "step": 6902, "task_loss": 0.504131019115448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5798989534378052, "epoch": 5.83, "learning_rate": 2.313797313797314e-05, "loss": 0.7972, "step": 6903, "task_loss": 0.7161124348640442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47254636883735657, "epoch": 5.84, "learning_rate": 2.3133276979430826e-05, "loss": 0.7002, "step": 6904, "task_loss": 1.3864881992340088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4650743007659912, "epoch": 5.84, "learning_rate": 2.3128580820888516e-05, "loss": 0.6945, "step": 6905, "task_loss": 0.9804227948188782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9478390216827393, "epoch": 5.84, "learning_rate": 2.3123884662346202e-05, "loss": 0.7509, "step": 6906, "task_loss": 1.159545660018921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8576946258544922, "epoch": 5.84, "learning_rate": 2.3119188503803892e-05, "loss": 0.8091, "step": 6907, "task_loss": 0.5560330152511597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8196834325790405, "epoch": 5.84, "learning_rate": 2.3114492345261575e-05, "loss": 0.7594, "step": 6908, "task_loss": 1.7768142223358154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6918762922286987, "epoch": 5.84, "learning_rate": 2.3109796186719265e-05, "loss": 0.7552, "step": 6909, "task_loss": 0.9049802422523499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6369016766548157, "epoch": 5.84, "learning_rate": 2.310510002817695e-05, "loss": 1.107, "step": 6910, "task_loss": 0.8494186997413635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1935138702392578, "epoch": 5.84, "learning_rate": 2.310040386963464e-05, "loss": 0.8563, "step": 6911, "task_loss": 0.9398133754730225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8485157489776611, "epoch": 5.84, "learning_rate": 2.3095707711092327e-05, "loss": 0.8311, "step": 6912, "task_loss": 0.710290789604187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8282005786895752, "epoch": 5.84, "learning_rate": 2.3091011552550017e-05, "loss": 0.796, "step": 6913, "task_loss": 1.003414511680603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5739920735359192, "epoch": 5.84, "learning_rate": 2.3086315394007703e-05, "loss": 0.7348, "step": 6914, "task_loss": 1.0499862432479858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5831115245819092, "epoch": 5.84, "learning_rate": 2.308161923546539e-05, "loss": 0.5273, "step": 6915, "task_loss": 0.5750800967216492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2562167644500732, "epoch": 5.85, "learning_rate": 2.307692307692308e-05, "loss": 0.9445, "step": 6916, "task_loss": 0.6336440443992615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5796395540237427, "epoch": 5.85, "learning_rate": 2.3072226918380765e-05, "loss": 0.6565, "step": 6917, "task_loss": 0.53474360704422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1181224584579468, "epoch": 5.85, "learning_rate": 2.3067530759838455e-05, "loss": 0.9105, "step": 6918, "task_loss": 1.7752617597579956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3740057945251465, "epoch": 5.85, "learning_rate": 2.306283460129614e-05, "loss": 0.8335, "step": 6919, "task_loss": 1.0588057041168213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2056665420532227, "epoch": 5.85, "learning_rate": 2.3058138442753828e-05, "loss": 0.8513, "step": 6920, "task_loss": 1.7895638942718506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0523415803909302, "epoch": 5.85, "learning_rate": 2.3053442284211514e-05, "loss": 0.8131, "step": 6921, "task_loss": 0.7537809610366821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45795899629592896, "epoch": 5.85, "learning_rate": 2.3048746125669204e-05, "loss": 0.7593, "step": 6922, "task_loss": 0.39967092871665955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5021393299102783, "epoch": 5.85, "learning_rate": 2.304404996712689e-05, "loss": 0.6537, "step": 6923, "task_loss": 0.37599408626556396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5812463760375977, "epoch": 5.85, "learning_rate": 2.303935380858458e-05, "loss": 0.8262, "step": 6924, "task_loss": 0.5114654898643494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.584484875202179, "epoch": 5.85, "learning_rate": 2.3034657650042266e-05, "loss": 0.6678, "step": 6925, "task_loss": 0.48684853315353394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7140631675720215, "epoch": 5.85, "learning_rate": 2.3029961491499956e-05, "loss": 0.8065, "step": 6926, "task_loss": 0.4762769043445587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8898043632507324, "epoch": 5.85, "learning_rate": 2.302526533295764e-05, "loss": 0.868, "step": 6927, "task_loss": 0.9139252305030823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8778864145278931, "epoch": 5.86, "learning_rate": 2.302056917441533e-05, "loss": 0.7373, "step": 6928, "task_loss": 0.6323369145393372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8031850457191467, "epoch": 5.86, "learning_rate": 2.3015873015873015e-05, "loss": 1.0155, "step": 6929, "task_loss": 0.9969375729560852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6344562768936157, "epoch": 5.86, "learning_rate": 2.3011176857330705e-05, "loss": 0.976, "step": 6930, "task_loss": 1.4018889665603638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.779607355594635, "epoch": 5.86, "learning_rate": 2.3006480698788394e-05, "loss": 0.8697, "step": 6931, "task_loss": 1.8786869049072266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.31294846534729, "epoch": 5.86, "learning_rate": 2.300178454024608e-05, "loss": 1.0162, "step": 6932, "task_loss": 1.6797150373458862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2992999851703644, "epoch": 5.86, "learning_rate": 2.2997088381703767e-05, "loss": 0.5996, "step": 6933, "task_loss": 0.7629806399345398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6183074712753296, "epoch": 5.86, "learning_rate": 2.2992392223161454e-05, "loss": 0.7875, "step": 6934, "task_loss": 0.7597124576568604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3574421405792236, "epoch": 5.86, "learning_rate": 2.2987696064619143e-05, "loss": 0.8808, "step": 6935, "task_loss": 2.2512619495391846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3099138736724854, "epoch": 5.86, "learning_rate": 2.298299990607683e-05, "loss": 1.0834, "step": 6936, "task_loss": 0.8322623372077942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7932381629943848, "epoch": 5.86, "learning_rate": 2.297830374753452e-05, "loss": 0.7833, "step": 6937, "task_loss": 0.9608466029167175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.537906289100647, "epoch": 5.86, "learning_rate": 2.2973607588992206e-05, "loss": 0.7749, "step": 6938, "task_loss": 0.1984475553035736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9016161561012268, "epoch": 5.87, "learning_rate": 2.2968911430449895e-05, "loss": 0.8486, "step": 6939, "task_loss": 0.6750759482383728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7129932641983032, "epoch": 5.87, "learning_rate": 2.296421527190758e-05, "loss": 0.8494, "step": 6940, "task_loss": 0.904747724533081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9135289192199707, "epoch": 5.87, "learning_rate": 2.2959519113365268e-05, "loss": 0.9275, "step": 6941, "task_loss": 0.6140274405479431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5805168747901917, "epoch": 5.87, "learning_rate": 2.2954822954822954e-05, "loss": 0.6697, "step": 6942, "task_loss": 0.4809413552284241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6260769367218018, "epoch": 5.87, "learning_rate": 2.2950126796280644e-05, "loss": 0.9715, "step": 6943, "task_loss": 1.5445563793182373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8908027410507202, "epoch": 5.87, "learning_rate": 2.294543063773833e-05, "loss": 0.8489, "step": 6944, "task_loss": 0.3793049454689026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7050027847290039, "epoch": 5.87, "learning_rate": 2.294073447919602e-05, "loss": 0.812, "step": 6945, "task_loss": 0.9843765497207642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6942324638366699, "epoch": 5.87, "learning_rate": 2.2936038320653707e-05, "loss": 0.7949, "step": 6946, "task_loss": 0.12731574475765228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5897660255432129, "epoch": 5.87, "learning_rate": 2.2931342162111393e-05, "loss": 0.8606, "step": 6947, "task_loss": 0.540510892868042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.104628086090088, "epoch": 5.87, "learning_rate": 2.2926646003569083e-05, "loss": 0.8584, "step": 6948, "task_loss": 1.8825358152389526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5080540180206299, "epoch": 5.87, "learning_rate": 2.292194984502677e-05, "loss": 0.5718, "step": 6949, "task_loss": 0.24305301904678345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6762527823448181, "epoch": 5.87, "learning_rate": 2.291725368648446e-05, "loss": 0.9894, "step": 6950, "task_loss": 0.7431737780570984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8093144297599792, "epoch": 5.88, "learning_rate": 2.2912557527942145e-05, "loss": 1.0251, "step": 6951, "task_loss": 1.1379114389419556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8532044887542725, "epoch": 5.88, "learning_rate": 2.290786136939983e-05, "loss": 0.8374, "step": 6952, "task_loss": 0.704490065574646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.061250925064087, "epoch": 5.88, "learning_rate": 2.2903165210857518e-05, "loss": 0.7037, "step": 6953, "task_loss": 1.7897157669067383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8602464199066162, "epoch": 5.88, "learning_rate": 2.2898469052315207e-05, "loss": 0.8111, "step": 6954, "task_loss": 0.27614036202430725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7431944608688354, "epoch": 5.88, "learning_rate": 2.2893772893772894e-05, "loss": 0.7349, "step": 6955, "task_loss": 0.675443172454834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8999468684196472, "epoch": 5.88, "learning_rate": 2.2889076735230583e-05, "loss": 1.0207, "step": 6956, "task_loss": 1.0080044269561768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2429406642913818, "epoch": 5.88, "learning_rate": 2.288438057668827e-05, "loss": 0.9625, "step": 6957, "task_loss": 1.0731648206710815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49153000116348267, "epoch": 5.88, "learning_rate": 2.287968441814596e-05, "loss": 0.8254, "step": 6958, "task_loss": 0.46819254755973816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9313098788261414, "epoch": 5.88, "learning_rate": 2.2874988259603642e-05, "loss": 0.9289, "step": 6959, "task_loss": 0.7599198222160339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6940560340881348, "epoch": 5.88, "learning_rate": 2.2870292101061332e-05, "loss": 0.7467, "step": 6960, "task_loss": 0.5447453856468201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.746895432472229, "epoch": 5.88, "learning_rate": 2.286559594251902e-05, "loss": 0.7088, "step": 6961, "task_loss": 0.9086584448814392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7705367207527161, "epoch": 5.88, "learning_rate": 2.2860899783976708e-05, "loss": 0.7672, "step": 6962, "task_loss": 0.3587607145309448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6468161940574646, "epoch": 5.89, "learning_rate": 2.2856203625434398e-05, "loss": 0.7425, "step": 6963, "task_loss": 0.7610446810722351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4951876997947693, "epoch": 5.89, "learning_rate": 2.2851507466892084e-05, "loss": 0.5139, "step": 6964, "task_loss": 0.2983340919017792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5553719997406006, "epoch": 5.89, "learning_rate": 2.284681130834977e-05, "loss": 0.9587, "step": 6965, "task_loss": 0.5462021231651306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5496252775192261, "epoch": 5.89, "learning_rate": 2.2842115149807457e-05, "loss": 0.7714, "step": 6966, "task_loss": 0.6715641617774963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1837775707244873, "epoch": 5.89, "learning_rate": 2.2837418991265147e-05, "loss": 1.0179, "step": 6967, "task_loss": 1.1799731254577637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34019410610198975, "epoch": 5.89, "learning_rate": 2.2832722832722833e-05, "loss": 0.7091, "step": 6968, "task_loss": 0.786483108997345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8145132064819336, "epoch": 5.89, "learning_rate": 2.2828026674180523e-05, "loss": 0.8462, "step": 6969, "task_loss": 1.3186742067337036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3847671151161194, "epoch": 5.89, "learning_rate": 2.282333051563821e-05, "loss": 0.7295, "step": 6970, "task_loss": 0.19420328736305237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.160125494003296, "epoch": 5.89, "learning_rate": 2.2818634357095896e-05, "loss": 0.8782, "step": 6971, "task_loss": 0.7240820527076721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6951016187667847, "epoch": 5.89, "learning_rate": 2.2813938198553582e-05, "loss": 0.7029, "step": 6972, "task_loss": 0.527197003364563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8841211795806885, "epoch": 5.89, "learning_rate": 2.280924204001127e-05, "loss": 0.7261, "step": 6973, "task_loss": 1.3426038026809692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9797552227973938, "epoch": 5.89, "learning_rate": 2.2804545881468958e-05, "loss": 0.8638, "step": 6974, "task_loss": 1.3700141906738281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9698609709739685, "epoch": 5.9, "learning_rate": 2.2799849722926648e-05, "loss": 0.9006, "step": 6975, "task_loss": 1.3447016477584839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6851927042007446, "epoch": 5.9, "learning_rate": 2.2795153564384334e-05, "loss": 0.9069, "step": 6976, "task_loss": 0.6201470494270325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8143402934074402, "epoch": 5.9, "learning_rate": 2.2790457405842024e-05, "loss": 0.9042, "step": 6977, "task_loss": 0.7006320357322693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8343498110771179, "epoch": 5.9, "learning_rate": 2.278576124729971e-05, "loss": 0.9097, "step": 6978, "task_loss": 1.211145281791687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1546915769577026, "epoch": 5.9, "learning_rate": 2.2781065088757396e-05, "loss": 0.8276, "step": 6979, "task_loss": 0.6206389665603638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7788922786712646, "epoch": 5.9, "learning_rate": 2.2776368930215086e-05, "loss": 1.0003, "step": 6980, "task_loss": 1.155000925064087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9455762505531311, "epoch": 5.9, "learning_rate": 2.2771672771672772e-05, "loss": 0.884, "step": 6981, "task_loss": 2.117172956466675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5260904431343079, "epoch": 5.9, "learning_rate": 2.2766976613130462e-05, "loss": 0.6771, "step": 6982, "task_loss": 0.5692490339279175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6814124584197998, "epoch": 5.9, "learning_rate": 2.276228045458815e-05, "loss": 1.1265, "step": 6983, "task_loss": 1.0798364877700806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6738179922103882, "epoch": 5.9, "learning_rate": 2.2757584296045835e-05, "loss": 0.7647, "step": 6984, "task_loss": 1.0989285707473755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8315807580947876, "epoch": 5.9, "learning_rate": 2.275288813750352e-05, "loss": 0.6436, "step": 6985, "task_loss": 0.6120631098747253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6761645078659058, "epoch": 5.9, "learning_rate": 2.274819197896121e-05, "loss": 0.6581, "step": 6986, "task_loss": 1.4359335899353027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6545689105987549, "epoch": 5.91, "learning_rate": 2.2743495820418897e-05, "loss": 0.7992, "step": 6987, "task_loss": 0.90062415599823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8116587400436401, "epoch": 5.91, "learning_rate": 2.2738799661876587e-05, "loss": 0.7128, "step": 6988, "task_loss": 1.2466007471084595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8340736031532288, "epoch": 5.91, "learning_rate": 2.2734103503334273e-05, "loss": 0.7654, "step": 6989, "task_loss": 0.2302483767271042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.139648675918579, "epoch": 5.91, "learning_rate": 2.2729407344791963e-05, "loss": 0.9168, "step": 6990, "task_loss": 2.247284173965454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.420558214187622, "epoch": 5.91, "learning_rate": 2.2724711186249646e-05, "loss": 0.8404, "step": 6991, "task_loss": 1.8198771476745605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7651773691177368, "epoch": 5.91, "learning_rate": 2.2720015027707336e-05, "loss": 1.0069, "step": 6992, "task_loss": 1.8693655729293823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9467302560806274, "epoch": 5.91, "learning_rate": 2.2715318869165025e-05, "loss": 0.8665, "step": 6993, "task_loss": 0.9998923540115356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8840410113334656, "epoch": 5.91, "learning_rate": 2.2710622710622712e-05, "loss": 0.912, "step": 6994, "task_loss": 1.0522584915161133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9404094219207764, "epoch": 5.91, "learning_rate": 2.27059265520804e-05, "loss": 0.8903, "step": 6995, "task_loss": 0.9163635969161987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1066088676452637, "epoch": 5.91, "learning_rate": 2.2701230393538088e-05, "loss": 0.9866, "step": 6996, "task_loss": 0.5677676200866699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.899699866771698, "epoch": 5.91, "learning_rate": 2.2696534234995774e-05, "loss": 0.7972, "step": 6997, "task_loss": 0.5036907196044922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1382992267608643, "epoch": 5.91, "learning_rate": 2.269183807645346e-05, "loss": 0.9107, "step": 6998, "task_loss": 1.9735748767852783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9778037071228027, "epoch": 5.92, "learning_rate": 2.268714191791115e-05, "loss": 0.8647, "step": 6999, "task_loss": 1.1764501333236694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47807416319847107, "epoch": 5.92, "learning_rate": 2.2682445759368837e-05, "loss": 0.7024, "step": 7000, "task_loss": 0.4996645450592041 }, { "epoch": 5.92, "eval_accuracy": 0.8854257425742574, "eval_loss": 0.5055385828018188, "eval_runtime": 223.963, "eval_samples_per_second": 112.742, "eval_steps_per_second": 0.884, "step": 7000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9040396809577942, "epoch": 5.92, "learning_rate": 2.2677749600826526e-05, "loss": 0.7892, "step": 7001, "task_loss": 1.3055602312088013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2988966703414917, "epoch": 5.92, "learning_rate": 2.2673053442284213e-05, "loss": 0.821, "step": 7002, "task_loss": 1.2801527976989746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7847487926483154, "epoch": 5.92, "learning_rate": 2.26683572837419e-05, "loss": 0.8616, "step": 7003, "task_loss": 0.5704183578491211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0363689661026, "epoch": 5.92, "learning_rate": 2.2663661125199585e-05, "loss": 0.8978, "step": 7004, "task_loss": 1.661550760269165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7587233781814575, "epoch": 5.92, "learning_rate": 2.2658964966657275e-05, "loss": 0.9157, "step": 7005, "task_loss": 0.28078019618988037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5409667491912842, "epoch": 5.92, "learning_rate": 2.265426880811496e-05, "loss": 0.8121, "step": 7006, "task_loss": 0.1549520641565323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.881912350654602, "epoch": 5.92, "learning_rate": 2.264957264957265e-05, "loss": 0.7941, "step": 7007, "task_loss": 0.9966364502906799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6913919448852539, "epoch": 5.92, "learning_rate": 2.264487649103034e-05, "loss": 0.7468, "step": 7008, "task_loss": 1.0083352327346802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1021840572357178, "epoch": 5.92, "learning_rate": 2.2640180332488027e-05, "loss": 0.8892, "step": 7009, "task_loss": 1.2007498741149902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8597589731216431, "epoch": 5.93, "learning_rate": 2.2635484173945714e-05, "loss": 0.8302, "step": 7010, "task_loss": 1.1725040674209595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8254997134208679, "epoch": 5.93, "learning_rate": 2.26307880154034e-05, "loss": 0.6778, "step": 7011, "task_loss": 1.287405252456665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9647136926651001, "epoch": 5.93, "learning_rate": 2.262609185686109e-05, "loss": 0.8139, "step": 7012, "task_loss": 0.5942767262458801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1154611110687256, "epoch": 5.93, "learning_rate": 2.2621395698318776e-05, "loss": 0.7528, "step": 7013, "task_loss": 0.9798694849014282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8224474191665649, "epoch": 5.93, "learning_rate": 2.2616699539776466e-05, "loss": 0.8902, "step": 7014, "task_loss": 0.5536147952079773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8791772127151489, "epoch": 5.93, "learning_rate": 2.2612003381234152e-05, "loss": 1.0204, "step": 7015, "task_loss": 1.2398756742477417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6155428886413574, "epoch": 5.93, "learning_rate": 2.260730722269184e-05, "loss": 0.7949, "step": 7016, "task_loss": 1.1316947937011719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7708951234817505, "epoch": 5.93, "learning_rate": 2.2602611064149525e-05, "loss": 0.7559, "step": 7017, "task_loss": 0.6584386229515076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0584901571273804, "epoch": 5.93, "learning_rate": 2.2597914905607214e-05, "loss": 0.8925, "step": 7018, "task_loss": 0.5301038026809692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39632049202919006, "epoch": 5.93, "learning_rate": 2.25932187470649e-05, "loss": 0.6611, "step": 7019, "task_loss": 0.16441869735717773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9264040589332581, "epoch": 5.93, "learning_rate": 2.258852258852259e-05, "loss": 0.7298, "step": 7020, "task_loss": 0.695143461227417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0766865015029907, "epoch": 5.93, "learning_rate": 2.2583826429980277e-05, "loss": 0.8871, "step": 7021, "task_loss": 1.334552526473999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7723246812820435, "epoch": 5.94, "learning_rate": 2.2579130271437963e-05, "loss": 0.7316, "step": 7022, "task_loss": 0.8982069492340088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.494918555021286, "epoch": 5.94, "learning_rate": 2.2574434112895653e-05, "loss": 0.7128, "step": 7023, "task_loss": 1.1601157188415527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3970741033554077, "epoch": 5.94, "learning_rate": 2.256973795435334e-05, "loss": 0.5014, "step": 7024, "task_loss": 0.6527501940727234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8461787104606628, "epoch": 5.94, "learning_rate": 2.256504179581103e-05, "loss": 0.806, "step": 7025, "task_loss": 0.804229199886322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2757049798965454, "epoch": 5.94, "learning_rate": 2.2560345637268715e-05, "loss": 0.8991, "step": 7026, "task_loss": 0.614885151386261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7020004391670227, "epoch": 5.94, "learning_rate": 2.2555649478726405e-05, "loss": 0.7932, "step": 7027, "task_loss": 1.1141961812973022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5329338312149048, "epoch": 5.94, "learning_rate": 2.255095332018409e-05, "loss": 0.8054, "step": 7028, "task_loss": 0.7868536114692688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7166117429733276, "epoch": 5.94, "learning_rate": 2.2546257161641778e-05, "loss": 0.6679, "step": 7029, "task_loss": 1.5020278692245483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4864065647125244, "epoch": 5.94, "learning_rate": 2.2541561003099464e-05, "loss": 0.5696, "step": 7030, "task_loss": 0.2934349477291107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5985938906669617, "epoch": 5.94, "learning_rate": 2.2536864844557154e-05, "loss": 0.7326, "step": 7031, "task_loss": 1.5550155639648438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9186473488807678, "epoch": 5.94, "learning_rate": 2.253216868601484e-05, "loss": 0.9266, "step": 7032, "task_loss": 1.363355278968811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7623573541641235, "epoch": 5.94, "learning_rate": 2.252747252747253e-05, "loss": 0.7315, "step": 7033, "task_loss": 1.289315938949585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0929038524627686, "epoch": 5.95, "learning_rate": 2.2522776368930216e-05, "loss": 0.6917, "step": 7034, "task_loss": 0.785473108291626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5148024559020996, "epoch": 5.95, "learning_rate": 2.2518080210387903e-05, "loss": 0.6783, "step": 7035, "task_loss": 0.5722660422325134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.556762158870697, "epoch": 5.95, "learning_rate": 2.251338405184559e-05, "loss": 0.5528, "step": 7036, "task_loss": 0.7310892939567566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0450648069381714, "epoch": 5.95, "learning_rate": 2.250868789330328e-05, "loss": 1.0059, "step": 7037, "task_loss": 1.366563081741333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7678801417350769, "epoch": 5.95, "learning_rate": 2.2503991734760965e-05, "loss": 0.7143, "step": 7038, "task_loss": 1.257102370262146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8145753145217896, "epoch": 5.95, "learning_rate": 2.2499295576218655e-05, "loss": 0.7815, "step": 7039, "task_loss": 2.7202773094177246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4608686864376068, "epoch": 5.95, "learning_rate": 2.2494599417676344e-05, "loss": 0.7233, "step": 7040, "task_loss": 0.19801190495491028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7397960424423218, "epoch": 5.95, "learning_rate": 2.248990325913403e-05, "loss": 0.7308, "step": 7041, "task_loss": 0.3678992688655853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.047513723373413, "epoch": 5.95, "learning_rate": 2.2485207100591717e-05, "loss": 0.9692, "step": 7042, "task_loss": 0.5027016401290894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6259472370147705, "epoch": 5.95, "learning_rate": 2.2480510942049403e-05, "loss": 0.8576, "step": 7043, "task_loss": 1.2858057022094727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7971817851066589, "epoch": 5.95, "learning_rate": 2.2475814783507093e-05, "loss": 0.856, "step": 7044, "task_loss": 1.005333662033081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4435473084449768, "epoch": 5.95, "learning_rate": 2.247111862496478e-05, "loss": 0.7333, "step": 7045, "task_loss": 0.9740713238716125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7415435314178467, "epoch": 5.96, "learning_rate": 2.246642246642247e-05, "loss": 0.6712, "step": 7046, "task_loss": 1.130391001701355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7249873876571655, "epoch": 5.96, "learning_rate": 2.2461726307880156e-05, "loss": 0.7536, "step": 7047, "task_loss": 0.44802868366241455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3990975022315979, "epoch": 5.96, "learning_rate": 2.2457030149337842e-05, "loss": 0.6366, "step": 7048, "task_loss": 0.9249972105026245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.650456428527832, "epoch": 5.96, "learning_rate": 2.2452333990795528e-05, "loss": 0.7558, "step": 7049, "task_loss": 0.5162448287010193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1791160106658936, "epoch": 5.96, "learning_rate": 2.2447637832253218e-05, "loss": 0.9284, "step": 7050, "task_loss": 1.917451024055481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8004198670387268, "epoch": 5.96, "learning_rate": 2.2442941673710904e-05, "loss": 0.7469, "step": 7051, "task_loss": 0.4407010078430176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5067516565322876, "epoch": 5.96, "learning_rate": 2.2438245515168594e-05, "loss": 0.8335, "step": 7052, "task_loss": 0.3539488911628723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.91002357006073, "epoch": 5.96, "learning_rate": 2.243354935662628e-05, "loss": 0.8353, "step": 7053, "task_loss": 0.5460407137870789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1096444129943848, "epoch": 5.96, "learning_rate": 2.2428853198083967e-05, "loss": 1.1322, "step": 7054, "task_loss": 1.2017309665679932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6467916965484619, "epoch": 5.96, "learning_rate": 2.2424157039541656e-05, "loss": 0.9962, "step": 7055, "task_loss": 0.4231715500354767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4409056305885315, "epoch": 5.96, "learning_rate": 2.2419460880999343e-05, "loss": 1.1548, "step": 7056, "task_loss": 0.20309779047966003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3258100152015686, "epoch": 5.96, "learning_rate": 2.2414764722457033e-05, "loss": 0.8573, "step": 7057, "task_loss": 0.6042032837867737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6109276413917542, "epoch": 5.97, "learning_rate": 2.241006856391472e-05, "loss": 0.9022, "step": 7058, "task_loss": 1.354566216468811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.361857295036316, "epoch": 5.97, "learning_rate": 2.240537240537241e-05, "loss": 1.0052, "step": 7059, "task_loss": 1.3243136405944824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5689749717712402, "epoch": 5.97, "learning_rate": 2.2400676246830095e-05, "loss": 0.6964, "step": 7060, "task_loss": 1.2574723958969116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0306953191757202, "epoch": 5.97, "learning_rate": 2.239598008828778e-05, "loss": 0.8826, "step": 7061, "task_loss": 1.0141798257827759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7826124429702759, "epoch": 5.97, "learning_rate": 2.2391283929745468e-05, "loss": 0.8073, "step": 7062, "task_loss": 0.5343944430351257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3529873192310333, "epoch": 5.97, "learning_rate": 2.2386587771203157e-05, "loss": 0.7151, "step": 7063, "task_loss": 0.6386767029762268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9296773076057434, "epoch": 5.97, "learning_rate": 2.2381891612660844e-05, "loss": 0.6858, "step": 7064, "task_loss": 0.9052790403366089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6671645045280457, "epoch": 5.97, "learning_rate": 2.2377195454118533e-05, "loss": 0.8071, "step": 7065, "task_loss": 1.0239019393920898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6642403602600098, "epoch": 5.97, "learning_rate": 2.237249929557622e-05, "loss": 0.896, "step": 7066, "task_loss": 0.23392505943775177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5275527238845825, "epoch": 5.97, "learning_rate": 2.2367803137033906e-05, "loss": 0.745, "step": 7067, "task_loss": 1.2327420711517334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2827645540237427, "epoch": 5.97, "learning_rate": 2.2363106978491592e-05, "loss": 0.7879, "step": 7068, "task_loss": 1.3057681322097778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6249422430992126, "epoch": 5.97, "learning_rate": 2.2358410819949282e-05, "loss": 0.8314, "step": 7069, "task_loss": 1.0320022106170654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4094274342060089, "epoch": 5.98, "learning_rate": 2.2353714661406972e-05, "loss": 0.714, "step": 7070, "task_loss": 0.516326904296875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4723914861679077, "epoch": 5.98, "learning_rate": 2.2349018502864658e-05, "loss": 1.1309, "step": 7071, "task_loss": 2.106091022491455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6771541237831116, "epoch": 5.98, "learning_rate": 2.2344322344322348e-05, "loss": 0.7769, "step": 7072, "task_loss": 1.31974458694458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7384321689605713, "epoch": 5.98, "learning_rate": 2.2339626185780034e-05, "loss": 0.815, "step": 7073, "task_loss": 0.6828835606575012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5445557832717896, "epoch": 5.98, "learning_rate": 2.233493002723772e-05, "loss": 0.8728, "step": 7074, "task_loss": 0.3883567154407501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5435608625411987, "epoch": 5.98, "learning_rate": 2.2330233868695407e-05, "loss": 0.8916, "step": 7075, "task_loss": 0.2512663006782532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9800277352333069, "epoch": 5.98, "learning_rate": 2.2325537710153097e-05, "loss": 0.8062, "step": 7076, "task_loss": 0.7395995855331421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5906433463096619, "epoch": 5.98, "learning_rate": 2.2320841551610783e-05, "loss": 0.7683, "step": 7077, "task_loss": 0.2557357847690582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4619249999523163, "epoch": 5.98, "learning_rate": 2.2316145393068473e-05, "loss": 0.6106, "step": 7078, "task_loss": 0.2611728310585022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9804410934448242, "epoch": 5.98, "learning_rate": 2.231144923452616e-05, "loss": 0.7374, "step": 7079, "task_loss": 1.9585700035095215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44385623931884766, "epoch": 5.98, "learning_rate": 2.2306753075983845e-05, "loss": 0.8936, "step": 7080, "task_loss": 0.861582338809967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5354273319244385, "epoch": 5.99, "learning_rate": 2.2302056917441532e-05, "loss": 1.1443, "step": 7081, "task_loss": 0.8242999315261841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5485125780105591, "epoch": 5.99, "learning_rate": 2.229736075889922e-05, "loss": 0.7053, "step": 7082, "task_loss": 1.0939903259277344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5410581827163696, "epoch": 5.99, "learning_rate": 2.2292664600356908e-05, "loss": 0.9531, "step": 7083, "task_loss": 0.7201462984085083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7585351467132568, "epoch": 5.99, "learning_rate": 2.2287968441814598e-05, "loss": 0.9586, "step": 7084, "task_loss": 0.8335826992988586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6598226428031921, "epoch": 5.99, "learning_rate": 2.2283272283272287e-05, "loss": 0.6252, "step": 7085, "task_loss": 1.2345012426376343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8331909775733948, "epoch": 5.99, "learning_rate": 2.227857612472997e-05, "loss": 0.9825, "step": 7086, "task_loss": 0.13560684025287628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.741552472114563, "epoch": 5.99, "learning_rate": 2.227387996618766e-05, "loss": 0.776, "step": 7087, "task_loss": 0.9646559357643127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.870415210723877, "epoch": 5.99, "learning_rate": 2.2269183807645346e-05, "loss": 0.6758, "step": 7088, "task_loss": 0.9939883351325989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.173767328262329, "epoch": 5.99, "learning_rate": 2.2264487649103036e-05, "loss": 0.8725, "step": 7089, "task_loss": 1.956870198249817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6443148851394653, "epoch": 5.99, "learning_rate": 2.2259791490560722e-05, "loss": 0.5856, "step": 7090, "task_loss": 0.2053501009941101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6967878341674805, "epoch": 5.99, "learning_rate": 2.2255095332018412e-05, "loss": 0.8258, "step": 7091, "task_loss": 1.0281916856765747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4606030285358429, "epoch": 5.99, "learning_rate": 2.22503991734761e-05, "loss": 0.613, "step": 7092, "task_loss": 1.3306766748428345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9267438650131226, "epoch": 6.0, "learning_rate": 2.2245703014933785e-05, "loss": 0.917, "step": 7093, "task_loss": 0.40084147453308105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4064692258834839, "epoch": 6.0, "learning_rate": 2.224100685639147e-05, "loss": 0.7798, "step": 7094, "task_loss": 1.770411729812622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6947298049926758, "epoch": 6.0, "learning_rate": 2.223631069784916e-05, "loss": 0.9485, "step": 7095, "task_loss": 0.7651915550231934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5776241421699524, "epoch": 6.0, "learning_rate": 2.2231614539306847e-05, "loss": 0.6901, "step": 7096, "task_loss": 1.2957662343978882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45729881525039673, "epoch": 6.0, "learning_rate": 2.2226918380764537e-05, "loss": 0.7849, "step": 7097, "task_loss": 0.5133386254310608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.386456847190857, "epoch": 6.0, "learning_rate": 2.2222222222222223e-05, "loss": 1.299, "step": 7098, "task_loss": 2.191117763519287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6390501856803894, "epoch": 6.0, "learning_rate": 2.221752606367991e-05, "loss": 1.1919, "step": 7099, "task_loss": 0.3939107358455658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6627697944641113, "epoch": 6.0, "learning_rate": 2.22128299051376e-05, "loss": 0.6886, "step": 7100, "task_loss": 0.6928160190582275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8601105213165283, "epoch": 6.0, "learning_rate": 2.2208133746595286e-05, "loss": 0.6416, "step": 7101, "task_loss": 1.2117775678634644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5194589495658875, "epoch": 6.0, "learning_rate": 2.2203437588052975e-05, "loss": 0.6364, "step": 7102, "task_loss": 0.22199073433876038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7150112390518188, "epoch": 6.0, "learning_rate": 2.2198741429510662e-05, "loss": 0.7656, "step": 7103, "task_loss": 0.6765703558921814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9991054534912109, "epoch": 6.01, "learning_rate": 2.219404527096835e-05, "loss": 1.0037, "step": 7104, "task_loss": 1.0277425050735474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5912360548973083, "epoch": 6.01, "learning_rate": 2.2189349112426034e-05, "loss": 0.6608, "step": 7105, "task_loss": 0.11691843718290329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6437273025512695, "epoch": 6.01, "learning_rate": 2.2184652953883724e-05, "loss": 0.6963, "step": 7106, "task_loss": 1.0331377983093262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0153918266296387, "epoch": 6.01, "learning_rate": 2.217995679534141e-05, "loss": 0.8166, "step": 7107, "task_loss": 0.7966892123222351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8375142812728882, "epoch": 6.01, "learning_rate": 2.21752606367991e-05, "loss": 0.6004, "step": 7108, "task_loss": 0.6863961219787598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8782296180725098, "epoch": 6.01, "learning_rate": 2.2170564478256787e-05, "loss": 0.6612, "step": 7109, "task_loss": 0.9410098195075989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.000123381614685, "epoch": 6.01, "learning_rate": 2.2165868319714476e-05, "loss": 0.7366, "step": 7110, "task_loss": 0.23385050892829895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45928698778152466, "epoch": 6.01, "learning_rate": 2.2161172161172163e-05, "loss": 1.0571, "step": 7111, "task_loss": 0.5523022413253784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8624265789985657, "epoch": 6.01, "learning_rate": 2.215647600262985e-05, "loss": 0.926, "step": 7112, "task_loss": 0.9815535545349121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0639142990112305, "epoch": 6.01, "learning_rate": 2.2151779844087535e-05, "loss": 0.8714, "step": 7113, "task_loss": 1.8417421579360962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4660433530807495, "epoch": 6.01, "learning_rate": 2.2147083685545225e-05, "loss": 0.5995, "step": 7114, "task_loss": 0.21918468177318573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8590984344482422, "epoch": 6.01, "learning_rate": 2.214238752700291e-05, "loss": 0.8607, "step": 7115, "task_loss": 0.9001765251159668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.562325119972229, "epoch": 6.02, "learning_rate": 2.21376913684606e-05, "loss": 0.8505, "step": 7116, "task_loss": 0.2037677764892578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7289694547653198, "epoch": 6.02, "learning_rate": 2.2132995209918287e-05, "loss": 0.537, "step": 7117, "task_loss": 0.5974984169006348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6921584606170654, "epoch": 6.02, "learning_rate": 2.2128299051375974e-05, "loss": 0.9813, "step": 7118, "task_loss": 0.39593058824539185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.106163501739502, "epoch": 6.02, "learning_rate": 2.2123602892833664e-05, "loss": 0.8433, "step": 7119, "task_loss": 1.7666001319885254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47236526012420654, "epoch": 6.02, "learning_rate": 2.211890673429135e-05, "loss": 0.7359, "step": 7120, "task_loss": 0.926882803440094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6657286286354065, "epoch": 6.02, "learning_rate": 2.211421057574904e-05, "loss": 0.7717, "step": 7121, "task_loss": 0.5081996917724609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3894999325275421, "epoch": 6.02, "learning_rate": 2.2109514417206726e-05, "loss": 0.5834, "step": 7122, "task_loss": 0.3828360438346863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8959948420524597, "epoch": 6.02, "learning_rate": 2.2104818258664416e-05, "loss": 0.8048, "step": 7123, "task_loss": 0.4054882526397705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7708507776260376, "epoch": 6.02, "learning_rate": 2.2100122100122102e-05, "loss": 0.8183, "step": 7124, "task_loss": 0.6130298376083374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8726853132247925, "epoch": 6.02, "learning_rate": 2.209542594157979e-05, "loss": 0.8971, "step": 7125, "task_loss": 1.4283827543258667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8115596771240234, "epoch": 6.02, "learning_rate": 2.2090729783037475e-05, "loss": 0.6379, "step": 7126, "task_loss": 1.21366548538208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8639369606971741, "epoch": 6.02, "learning_rate": 2.2086033624495164e-05, "loss": 0.8581, "step": 7127, "task_loss": 0.404360294342041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7997710108757019, "epoch": 6.03, "learning_rate": 2.208133746595285e-05, "loss": 0.8458, "step": 7128, "task_loss": 0.9138704538345337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9504649639129639, "epoch": 6.03, "learning_rate": 2.207664130741054e-05, "loss": 1.007, "step": 7129, "task_loss": 0.8732120394706726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4121496081352234, "epoch": 6.03, "learning_rate": 2.2071945148868227e-05, "loss": 0.6825, "step": 7130, "task_loss": 0.6081313490867615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8991836905479431, "epoch": 6.03, "learning_rate": 2.2067248990325913e-05, "loss": 1.0073, "step": 7131, "task_loss": 0.5454006195068359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8151270151138306, "epoch": 6.03, "learning_rate": 2.2062552831783603e-05, "loss": 0.6465, "step": 7132, "task_loss": 0.3109200894832611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5840371251106262, "epoch": 6.03, "learning_rate": 2.205785667324129e-05, "loss": 0.9447, "step": 7133, "task_loss": 0.5079665780067444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8900152444839478, "epoch": 6.03, "learning_rate": 2.205316051469898e-05, "loss": 0.7098, "step": 7134, "task_loss": 0.6795893311500549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1221367120742798, "epoch": 6.03, "learning_rate": 2.2048464356156665e-05, "loss": 0.7312, "step": 7135, "task_loss": 1.6909128427505493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6931196451187134, "epoch": 6.03, "learning_rate": 2.2043768197614355e-05, "loss": 0.8441, "step": 7136, "task_loss": 0.2701663374900818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.928598940372467, "epoch": 6.03, "learning_rate": 2.2039072039072038e-05, "loss": 0.8624, "step": 7137, "task_loss": 0.7406966090202332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8801091313362122, "epoch": 6.03, "learning_rate": 2.2034375880529728e-05, "loss": 0.8929, "step": 7138, "task_loss": 1.1174722909927368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3936173915863037, "epoch": 6.03, "learning_rate": 2.2029679721987414e-05, "loss": 0.619, "step": 7139, "task_loss": 0.8902227282524109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9734834432601929, "epoch": 6.04, "learning_rate": 2.2024983563445104e-05, "loss": 0.9808, "step": 7140, "task_loss": 0.4058310091495514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.691877007484436, "epoch": 6.04, "learning_rate": 2.202028740490279e-05, "loss": 1.0855, "step": 7141, "task_loss": 0.5366085767745972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5148212313652039, "epoch": 6.04, "learning_rate": 2.201559124636048e-05, "loss": 0.7127, "step": 7142, "task_loss": 1.0470640659332275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8280607461929321, "epoch": 6.04, "learning_rate": 2.2010895087818166e-05, "loss": 0.7594, "step": 7143, "task_loss": 0.3258991837501526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6546841859817505, "epoch": 6.04, "learning_rate": 2.2006198929275853e-05, "loss": 0.6847, "step": 7144, "task_loss": 0.6249213814735413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7568897604942322, "epoch": 6.04, "learning_rate": 2.200150277073354e-05, "loss": 0.7307, "step": 7145, "task_loss": 1.6866800785064697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8285806179046631, "epoch": 6.04, "learning_rate": 2.199680661219123e-05, "loss": 0.968, "step": 7146, "task_loss": 0.6400769948959351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7584047317504883, "epoch": 6.04, "learning_rate": 2.199211045364892e-05, "loss": 0.8086, "step": 7147, "task_loss": 1.0880722999572754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.995105504989624, "epoch": 6.04, "learning_rate": 2.1987414295106605e-05, "loss": 0.9051, "step": 7148, "task_loss": 0.9903795719146729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8639352321624756, "epoch": 6.04, "learning_rate": 2.198271813656429e-05, "loss": 0.7182, "step": 7149, "task_loss": 0.5534251928329468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9292064905166626, "epoch": 6.04, "learning_rate": 2.1978021978021977e-05, "loss": 0.955, "step": 7150, "task_loss": 0.7734515070915222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5083187222480774, "epoch": 6.04, "learning_rate": 2.1973325819479667e-05, "loss": 0.5964, "step": 7151, "task_loss": 0.4704549014568329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7466825246810913, "epoch": 6.05, "learning_rate": 2.1968629660937353e-05, "loss": 0.7319, "step": 7152, "task_loss": 0.7857828140258789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3125327229499817, "epoch": 6.05, "learning_rate": 2.1963933502395043e-05, "loss": 0.7296, "step": 7153, "task_loss": 0.2842787802219391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4295663833618164, "epoch": 6.05, "learning_rate": 2.195923734385273e-05, "loss": 0.9519, "step": 7154, "task_loss": 0.88944411277771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9978153705596924, "epoch": 6.05, "learning_rate": 2.195454118531042e-05, "loss": 0.9189, "step": 7155, "task_loss": 0.9452162384986877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0299227237701416, "epoch": 6.05, "learning_rate": 2.1949845026768106e-05, "loss": 0.8957, "step": 7156, "task_loss": 1.1841872930526733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36756280064582825, "epoch": 6.05, "learning_rate": 2.1945148868225792e-05, "loss": 0.7207, "step": 7157, "task_loss": 0.17631644010543823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5313814878463745, "epoch": 6.05, "learning_rate": 2.1940452709683478e-05, "loss": 0.6508, "step": 7158, "task_loss": 0.35032200813293457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41942355036735535, "epoch": 6.05, "learning_rate": 2.1935756551141168e-05, "loss": 0.6366, "step": 7159, "task_loss": 0.1456775963306427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7236137390136719, "epoch": 6.05, "learning_rate": 2.1931060392598854e-05, "loss": 0.6844, "step": 7160, "task_loss": 0.9470515251159668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7860940098762512, "epoch": 6.05, "learning_rate": 2.1926364234056544e-05, "loss": 0.8925, "step": 7161, "task_loss": 1.6932581663131714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8790786862373352, "epoch": 6.05, "learning_rate": 2.192166807551423e-05, "loss": 0.8136, "step": 7162, "task_loss": 0.7614015340805054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8930323719978333, "epoch": 6.05, "learning_rate": 2.1916971916971917e-05, "loss": 0.6996, "step": 7163, "task_loss": 0.6971017122268677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5683193802833557, "epoch": 6.06, "learning_rate": 2.1912275758429606e-05, "loss": 0.6169, "step": 7164, "task_loss": 0.2191290706396103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.912101149559021, "epoch": 6.06, "learning_rate": 2.1907579599887293e-05, "loss": 0.7586, "step": 7165, "task_loss": 0.6015219688415527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44284698367118835, "epoch": 6.06, "learning_rate": 2.1902883441344982e-05, "loss": 0.6089, "step": 7166, "task_loss": 0.509106457233429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4534986615180969, "epoch": 6.06, "learning_rate": 2.189818728280267e-05, "loss": 0.588, "step": 7167, "task_loss": 0.43487006425857544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5553808212280273, "epoch": 6.06, "learning_rate": 2.189349112426036e-05, "loss": 0.7025, "step": 7168, "task_loss": 0.0815938264131546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0130130052566528, "epoch": 6.06, "learning_rate": 2.188879496571804e-05, "loss": 0.7629, "step": 7169, "task_loss": 1.0839078426361084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8276294469833374, "epoch": 6.06, "learning_rate": 2.188409880717573e-05, "loss": 0.6815, "step": 7170, "task_loss": 0.7538753747940063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7806955575942993, "epoch": 6.06, "learning_rate": 2.1879402648633418e-05, "loss": 0.6866, "step": 7171, "task_loss": 1.2768598794937134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9060390591621399, "epoch": 6.06, "learning_rate": 2.1874706490091107e-05, "loss": 0.7436, "step": 7172, "task_loss": 0.5563049912452698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1874988079071045, "epoch": 6.06, "learning_rate": 2.1870010331548794e-05, "loss": 0.9611, "step": 7173, "task_loss": 1.6944698095321655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6625818610191345, "epoch": 6.06, "learning_rate": 2.1865314173006483e-05, "loss": 0.9391, "step": 7174, "task_loss": 1.0166760683059692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8671120405197144, "epoch": 6.07, "learning_rate": 2.186061801446417e-05, "loss": 0.6755, "step": 7175, "task_loss": 0.7961761951446533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8096573352813721, "epoch": 6.07, "learning_rate": 2.1855921855921856e-05, "loss": 0.8145, "step": 7176, "task_loss": 0.5377973914146423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7617976069450378, "epoch": 6.07, "learning_rate": 2.1851225697379546e-05, "loss": 0.8567, "step": 7177, "task_loss": 0.30853936076164246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5664557814598083, "epoch": 6.07, "learning_rate": 2.1846529538837232e-05, "loss": 0.7805, "step": 7178, "task_loss": 0.765559732913971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5551755428314209, "epoch": 6.07, "learning_rate": 2.1841833380294922e-05, "loss": 0.7457, "step": 7179, "task_loss": 0.9045150876045227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1659348011016846, "epoch": 6.07, "learning_rate": 2.1837137221752608e-05, "loss": 0.8006, "step": 7180, "task_loss": 1.2454986572265625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6403713822364807, "epoch": 6.07, "learning_rate": 2.1832441063210295e-05, "loss": 0.854, "step": 7181, "task_loss": 1.0918017625808716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.923053503036499, "epoch": 6.07, "learning_rate": 2.182774490466798e-05, "loss": 0.641, "step": 7182, "task_loss": 0.8203163743019104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5141291618347168, "epoch": 6.07, "learning_rate": 2.182304874612567e-05, "loss": 0.787, "step": 7183, "task_loss": 0.2888227105140686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4905552864074707, "epoch": 6.07, "learning_rate": 2.1818352587583357e-05, "loss": 0.8524, "step": 7184, "task_loss": 0.0369882732629776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7877859473228455, "epoch": 6.07, "learning_rate": 2.1813656429041047e-05, "loss": 0.8265, "step": 7185, "task_loss": 0.45535343885421753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1110153198242188, "epoch": 6.07, "learning_rate": 2.1808960270498733e-05, "loss": 0.9007, "step": 7186, "task_loss": 1.2666420936584473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6539536714553833, "epoch": 6.08, "learning_rate": 2.1804264111956423e-05, "loss": 0.6891, "step": 7187, "task_loss": 0.8341125249862671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9959495067596436, "epoch": 6.08, "learning_rate": 2.1799567953414106e-05, "loss": 0.7988, "step": 7188, "task_loss": 1.7797715663909912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7082169055938721, "epoch": 6.08, "learning_rate": 2.1794871794871795e-05, "loss": 0.6971, "step": 7189, "task_loss": 1.2113895416259766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9335125684738159, "epoch": 6.08, "learning_rate": 2.1790175636329482e-05, "loss": 0.7707, "step": 7190, "task_loss": 0.8765017986297607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6593595743179321, "epoch": 6.08, "learning_rate": 2.178547947778717e-05, "loss": 0.5838, "step": 7191, "task_loss": 0.8139443397521973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.463971734046936, "epoch": 6.08, "learning_rate": 2.1780783319244858e-05, "loss": 0.9903, "step": 7192, "task_loss": 1.496485948562622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7922533750534058, "epoch": 6.08, "learning_rate": 2.1776087160702548e-05, "loss": 0.7529, "step": 7193, "task_loss": 0.5306128263473511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7040068507194519, "epoch": 6.08, "learning_rate": 2.1771391002160234e-05, "loss": 0.7191, "step": 7194, "task_loss": 0.3697124123573303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8639769554138184, "epoch": 6.08, "learning_rate": 2.176669484361792e-05, "loss": 0.591, "step": 7195, "task_loss": 0.6479305028915405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5744210481643677, "epoch": 6.08, "learning_rate": 2.176199868507561e-05, "loss": 0.8295, "step": 7196, "task_loss": 0.7247838377952576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4449467658996582, "epoch": 6.08, "learning_rate": 2.1757302526533296e-05, "loss": 0.6335, "step": 7197, "task_loss": 0.2274782508611679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.54071044921875, "epoch": 6.08, "learning_rate": 2.1752606367990986e-05, "loss": 0.7804, "step": 7198, "task_loss": 0.9290853142738342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4709904193878174, "epoch": 6.09, "learning_rate": 2.1747910209448672e-05, "loss": 0.5373, "step": 7199, "task_loss": 0.7452442049980164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45014262199401855, "epoch": 6.09, "learning_rate": 2.174321405090636e-05, "loss": 0.5995, "step": 7200, "task_loss": 0.3879086673259735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7233297824859619, "epoch": 6.09, "learning_rate": 2.1738517892364045e-05, "loss": 0.9483, "step": 7201, "task_loss": 0.8628402352333069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49130895733833313, "epoch": 6.09, "learning_rate": 2.1733821733821735e-05, "loss": 0.654, "step": 7202, "task_loss": 0.6242318153381348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5314991474151611, "epoch": 6.09, "learning_rate": 2.172912557527942e-05, "loss": 0.6873, "step": 7203, "task_loss": 0.8942230939865112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7817369699478149, "epoch": 6.09, "learning_rate": 2.172442941673711e-05, "loss": 0.9124, "step": 7204, "task_loss": 1.788404107093811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3676624000072479, "epoch": 6.09, "learning_rate": 2.1719733258194797e-05, "loss": 0.7821, "step": 7205, "task_loss": 0.26986026763916016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0994385480880737, "epoch": 6.09, "learning_rate": 2.1715037099652487e-05, "loss": 0.9143, "step": 7206, "task_loss": 1.2674267292022705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.691881537437439, "epoch": 6.09, "learning_rate": 2.1710340941110173e-05, "loss": 0.763, "step": 7207, "task_loss": 0.39819008111953735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5348970293998718, "epoch": 6.09, "learning_rate": 2.170564478256786e-05, "loss": 0.7228, "step": 7208, "task_loss": 1.1176670789718628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49676042795181274, "epoch": 6.09, "learning_rate": 2.170094862402555e-05, "loss": 0.7046, "step": 7209, "task_loss": 0.7298954129219055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48109763860702515, "epoch": 6.09, "learning_rate": 2.1696252465483236e-05, "loss": 0.6479, "step": 7210, "task_loss": 0.7078199982643127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2839406728744507, "epoch": 6.1, "learning_rate": 2.1691556306940925e-05, "loss": 0.8161, "step": 7211, "task_loss": 2.052953004837036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5552632212638855, "epoch": 6.1, "learning_rate": 2.1686860148398612e-05, "loss": 0.6088, "step": 7212, "task_loss": 0.7348389625549316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1241247653961182, "epoch": 6.1, "learning_rate": 2.1682163989856298e-05, "loss": 0.6923, "step": 7213, "task_loss": 0.7685191631317139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8443110585212708, "epoch": 6.1, "learning_rate": 2.1677467831313984e-05, "loss": 0.7212, "step": 7214, "task_loss": 0.8456252813339233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2470649480819702, "epoch": 6.1, "learning_rate": 2.1672771672771674e-05, "loss": 0.895, "step": 7215, "task_loss": 1.8955786228179932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8324220180511475, "epoch": 6.1, "learning_rate": 2.166807551422936e-05, "loss": 0.9036, "step": 7216, "task_loss": 1.3671795129776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.144232988357544, "epoch": 6.1, "learning_rate": 2.166337935568705e-05, "loss": 1.0032, "step": 7217, "task_loss": 1.3315924406051636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9766315221786499, "epoch": 6.1, "learning_rate": 2.1658683197144737e-05, "loss": 0.6877, "step": 7218, "task_loss": 2.1666088104248047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5331373810768127, "epoch": 6.1, "learning_rate": 2.1653987038602426e-05, "loss": 0.6373, "step": 7219, "task_loss": 0.5655952095985413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6576696038246155, "epoch": 6.1, "learning_rate": 2.164929088006011e-05, "loss": 0.7624, "step": 7220, "task_loss": 0.10973702371120453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.284016489982605, "epoch": 6.1, "learning_rate": 2.16445947215178e-05, "loss": 0.7635, "step": 7221, "task_loss": 1.2200535535812378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.049090027809143, "epoch": 6.1, "learning_rate": 2.1639898562975485e-05, "loss": 0.8179, "step": 7222, "task_loss": 1.1400340795516968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5775550007820129, "epoch": 6.11, "learning_rate": 2.1635202404433175e-05, "loss": 0.7803, "step": 7223, "task_loss": 0.43651458621025085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8339256644248962, "epoch": 6.11, "learning_rate": 2.1630506245890865e-05, "loss": 0.7811, "step": 7224, "task_loss": 1.4608840942382812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8407207727432251, "epoch": 6.11, "learning_rate": 2.162581008734855e-05, "loss": 0.6794, "step": 7225, "task_loss": 1.6398729085922241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8818645477294922, "epoch": 6.11, "learning_rate": 2.1621113928806237e-05, "loss": 0.9659, "step": 7226, "task_loss": 1.0153367519378662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4996143579483032, "epoch": 6.11, "learning_rate": 2.1616417770263924e-05, "loss": 0.7219, "step": 7227, "task_loss": 0.9172160029411316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8310619592666626, "epoch": 6.11, "learning_rate": 2.1611721611721613e-05, "loss": 0.7772, "step": 7228, "task_loss": 0.7252551913261414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0883442163467407, "epoch": 6.11, "learning_rate": 2.16070254531793e-05, "loss": 0.7137, "step": 7229, "task_loss": 1.3826100826263428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6569727659225464, "epoch": 6.11, "learning_rate": 2.160232929463699e-05, "loss": 0.7403, "step": 7230, "task_loss": 0.6847851276397705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7464781403541565, "epoch": 6.11, "learning_rate": 2.1597633136094676e-05, "loss": 0.6631, "step": 7231, "task_loss": 0.5010188221931458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5325461626052856, "epoch": 6.11, "learning_rate": 2.1592936977552362e-05, "loss": 0.7047, "step": 7232, "task_loss": 0.42449143528938293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6557865738868713, "epoch": 6.11, "learning_rate": 2.158824081901005e-05, "loss": 0.7893, "step": 7233, "task_loss": 0.8632277846336365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48429661989212036, "epoch": 6.11, "learning_rate": 2.1583544660467738e-05, "loss": 0.8502, "step": 7234, "task_loss": 0.5889290571212769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6553199291229248, "epoch": 6.12, "learning_rate": 2.1578848501925425e-05, "loss": 0.7382, "step": 7235, "task_loss": 0.6046043634414673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6320979595184326, "epoch": 6.12, "learning_rate": 2.1574152343383114e-05, "loss": 0.6423, "step": 7236, "task_loss": 0.4473952651023865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9393744468688965, "epoch": 6.12, "learning_rate": 2.15694561848408e-05, "loss": 0.9265, "step": 7237, "task_loss": 1.5825186967849731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7281248569488525, "epoch": 6.12, "learning_rate": 2.156476002629849e-05, "loss": 0.557, "step": 7238, "task_loss": 0.7661282420158386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7714754343032837, "epoch": 6.12, "learning_rate": 2.1560063867756177e-05, "loss": 0.6167, "step": 7239, "task_loss": 1.1470433473587036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1284431219100952, "epoch": 6.12, "learning_rate": 2.1555367709213863e-05, "loss": 0.8751, "step": 7240, "task_loss": 0.5520270466804504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6003793478012085, "epoch": 6.12, "learning_rate": 2.1550671550671553e-05, "loss": 0.7384, "step": 7241, "task_loss": 0.5755481719970703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6661005020141602, "epoch": 6.12, "learning_rate": 2.154597539212924e-05, "loss": 0.661, "step": 7242, "task_loss": 0.8687528967857361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4307226836681366, "epoch": 6.12, "learning_rate": 2.154127923358693e-05, "loss": 0.7535, "step": 7243, "task_loss": 0.20820392668247223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.622411847114563, "epoch": 6.12, "learning_rate": 2.1536583075044615e-05, "loss": 0.836, "step": 7244, "task_loss": 0.7981469631195068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0739017724990845, "epoch": 6.12, "learning_rate": 2.15318869165023e-05, "loss": 0.7901, "step": 7245, "task_loss": 1.231209635734558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7040863037109375, "epoch": 6.13, "learning_rate": 2.1527190757959988e-05, "loss": 0.7473, "step": 7246, "task_loss": 1.5687010288238525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4498690664768219, "epoch": 6.13, "learning_rate": 2.1522494599417678e-05, "loss": 0.6428, "step": 7247, "task_loss": 0.970761775970459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8253179788589478, "epoch": 6.13, "learning_rate": 2.1517798440875364e-05, "loss": 0.733, "step": 7248, "task_loss": 1.0842448472976685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7498760223388672, "epoch": 6.13, "learning_rate": 2.1513102282333054e-05, "loss": 0.5574, "step": 7249, "task_loss": 0.3938294053077698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.175424337387085, "epoch": 6.13, "learning_rate": 2.150840612379074e-05, "loss": 0.8473, "step": 7250, "task_loss": 1.0221220254898071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5544967651367188, "epoch": 6.13, "learning_rate": 2.1503709965248426e-05, "loss": 0.7733, "step": 7251, "task_loss": 1.170464038848877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9272502660751343, "epoch": 6.13, "learning_rate": 2.1499013806706113e-05, "loss": 0.9199, "step": 7252, "task_loss": 1.1629717350006104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7568410634994507, "epoch": 6.13, "learning_rate": 2.1494317648163802e-05, "loss": 0.8159, "step": 7253, "task_loss": 0.8042982220649719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.870246410369873, "epoch": 6.13, "learning_rate": 2.1489621489621492e-05, "loss": 0.7979, "step": 7254, "task_loss": 0.5970286726951599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4697515368461609, "epoch": 6.13, "learning_rate": 2.148492533107918e-05, "loss": 0.5932, "step": 7255, "task_loss": 0.8992366790771484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5866495966911316, "epoch": 6.13, "learning_rate": 2.1480229172536868e-05, "loss": 0.6853, "step": 7256, "task_loss": 0.35000675916671753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4676623344421387, "epoch": 6.13, "learning_rate": 2.1475533013994555e-05, "loss": 0.6981, "step": 7257, "task_loss": 0.8484413027763367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7227258682250977, "epoch": 6.14, "learning_rate": 2.147083685545224e-05, "loss": 0.9086, "step": 7258, "task_loss": 0.9163452386856079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5786780714988708, "epoch": 6.14, "learning_rate": 2.1466140696909927e-05, "loss": 0.8879, "step": 7259, "task_loss": 0.8702569007873535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8037509918212891, "epoch": 6.14, "learning_rate": 2.1461444538367617e-05, "loss": 0.7396, "step": 7260, "task_loss": 1.1878986358642578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9788140058517456, "epoch": 6.14, "learning_rate": 2.1456748379825303e-05, "loss": 0.8149, "step": 7261, "task_loss": 0.8414046764373779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6259075403213501, "epoch": 6.14, "learning_rate": 2.1452052221282993e-05, "loss": 0.5798, "step": 7262, "task_loss": 0.10041853040456772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5766868591308594, "epoch": 6.14, "learning_rate": 2.144735606274068e-05, "loss": 0.7063, "step": 7263, "task_loss": 0.9152083992958069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3962056636810303, "epoch": 6.14, "learning_rate": 2.1442659904198366e-05, "loss": 0.7388, "step": 7264, "task_loss": 0.8943689465522766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7610682845115662, "epoch": 6.14, "learning_rate": 2.1437963745656052e-05, "loss": 0.6581, "step": 7265, "task_loss": 1.3881255388259888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6718268990516663, "epoch": 6.14, "learning_rate": 2.1433267587113742e-05, "loss": 0.7665, "step": 7266, "task_loss": 0.7187629342079163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5800164341926575, "epoch": 6.14, "learning_rate": 2.1428571428571428e-05, "loss": 0.5272, "step": 7267, "task_loss": 1.1388016939163208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49932217597961426, "epoch": 6.14, "learning_rate": 2.1423875270029118e-05, "loss": 0.5922, "step": 7268, "task_loss": 1.0593395233154297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6561961770057678, "epoch": 6.14, "learning_rate": 2.1419179111486804e-05, "loss": 0.6985, "step": 7269, "task_loss": 0.5234431624412537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3892948627471924, "epoch": 6.15, "learning_rate": 2.1414482952944494e-05, "loss": 0.8931, "step": 7270, "task_loss": 0.09130711853504181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3358620405197144, "epoch": 6.15, "learning_rate": 2.140978679440218e-05, "loss": 0.9554, "step": 7271, "task_loss": 1.078927993774414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8309074640274048, "epoch": 6.15, "learning_rate": 2.1405090635859867e-05, "loss": 0.5986, "step": 7272, "task_loss": 0.8111335039138794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6570920944213867, "epoch": 6.15, "learning_rate": 2.1400394477317556e-05, "loss": 0.5594, "step": 7273, "task_loss": 0.4908803403377533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8020046949386597, "epoch": 6.15, "learning_rate": 2.1395698318775243e-05, "loss": 1.0126, "step": 7274, "task_loss": 1.1019777059555054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4352622032165527, "epoch": 6.15, "learning_rate": 2.1391002160232932e-05, "loss": 1.0363, "step": 7275, "task_loss": 2.709707021713257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7937697172164917, "epoch": 6.15, "learning_rate": 2.138630600169062e-05, "loss": 0.661, "step": 7276, "task_loss": 1.2615931034088135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6526200771331787, "epoch": 6.15, "learning_rate": 2.1381609843148305e-05, "loss": 0.6235, "step": 7277, "task_loss": 0.465181827545166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8773199319839478, "epoch": 6.15, "learning_rate": 2.137691368460599e-05, "loss": 0.5956, "step": 7278, "task_loss": 0.21639348566532135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.529338538646698, "epoch": 6.15, "learning_rate": 2.137221752606368e-05, "loss": 0.687, "step": 7279, "task_loss": 0.529032826423645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7948794960975647, "epoch": 6.15, "learning_rate": 2.1367521367521368e-05, "loss": 0.5625, "step": 7280, "task_loss": 1.1228216886520386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.729832649230957, "epoch": 6.15, "learning_rate": 2.1362825208979057e-05, "loss": 0.7571, "step": 7281, "task_loss": 0.3001360297203064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5609764456748962, "epoch": 6.16, "learning_rate": 2.1358129050436744e-05, "loss": 0.6762, "step": 7282, "task_loss": 0.37356650829315186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7173242568969727, "epoch": 6.16, "learning_rate": 2.135343289189443e-05, "loss": 0.8533, "step": 7283, "task_loss": 0.24563397467136383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6386001110076904, "epoch": 6.16, "learning_rate": 2.1348736733352116e-05, "loss": 0.6748, "step": 7284, "task_loss": 0.2699788808822632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4724279046058655, "epoch": 6.16, "learning_rate": 2.1344040574809806e-05, "loss": 0.5259, "step": 7285, "task_loss": 0.7830075621604919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2683372497558594, "epoch": 6.16, "learning_rate": 2.1339344416267496e-05, "loss": 0.8824, "step": 7286, "task_loss": 0.7397258281707764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1949256658554077, "epoch": 6.16, "learning_rate": 2.1334648257725182e-05, "loss": 0.8228, "step": 7287, "task_loss": 0.6241793036460876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9681227803230286, "epoch": 6.16, "learning_rate": 2.1329952099182872e-05, "loss": 0.7345, "step": 7288, "task_loss": 0.9855908751487732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1236743927001953, "epoch": 6.16, "learning_rate": 2.1325255940640558e-05, "loss": 1.0649, "step": 7289, "task_loss": 1.1838055849075317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9979900121688843, "epoch": 6.16, "learning_rate": 2.1320559782098244e-05, "loss": 0.6947, "step": 7290, "task_loss": 0.47318726778030396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5081163048744202, "epoch": 6.16, "learning_rate": 2.131586362355593e-05, "loss": 0.6806, "step": 7291, "task_loss": 0.3591505289077759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3657757639884949, "epoch": 6.16, "learning_rate": 2.131116746501362e-05, "loss": 0.784, "step": 7292, "task_loss": 0.43556782603263855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7799893617630005, "epoch": 6.16, "learning_rate": 2.1306471306471307e-05, "loss": 0.7097, "step": 7293, "task_loss": 1.025412917137146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44088101387023926, "epoch": 6.17, "learning_rate": 2.1301775147928997e-05, "loss": 0.5249, "step": 7294, "task_loss": 0.504116952419281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7500743865966797, "epoch": 6.17, "learning_rate": 2.1297078989386683e-05, "loss": 0.6988, "step": 7295, "task_loss": 0.3334808647632599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7884249687194824, "epoch": 6.17, "learning_rate": 2.129238283084437e-05, "loss": 0.6073, "step": 7296, "task_loss": 1.069042682647705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.269519329071045, "epoch": 6.17, "learning_rate": 2.1287686672302056e-05, "loss": 0.8782, "step": 7297, "task_loss": 0.8933669924736023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9798206090927124, "epoch": 6.17, "learning_rate": 2.1282990513759745e-05, "loss": 0.7817, "step": 7298, "task_loss": 0.8537862300872803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6239919066429138, "epoch": 6.17, "learning_rate": 2.127829435521743e-05, "loss": 0.5295, "step": 7299, "task_loss": 0.5681928992271423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6067286133766174, "epoch": 6.17, "learning_rate": 2.127359819667512e-05, "loss": 0.7828, "step": 7300, "task_loss": 0.4442169666290283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0570231676101685, "epoch": 6.17, "learning_rate": 2.126890203813281e-05, "loss": 0.8374, "step": 7301, "task_loss": 1.5841808319091797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4732641279697418, "epoch": 6.17, "learning_rate": 2.1264205879590497e-05, "loss": 0.5622, "step": 7302, "task_loss": 0.6028682589530945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6223129034042358, "epoch": 6.17, "learning_rate": 2.1259509721048184e-05, "loss": 0.7591, "step": 7303, "task_loss": 0.3884597420692444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.648101806640625, "epoch": 6.17, "learning_rate": 2.125481356250587e-05, "loss": 0.6647, "step": 7304, "task_loss": 0.08784683048725128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.96834796667099, "epoch": 6.17, "learning_rate": 2.125011740396356e-05, "loss": 0.9422, "step": 7305, "task_loss": 2.077296733856201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5976031422615051, "epoch": 6.18, "learning_rate": 2.1245421245421246e-05, "loss": 0.6508, "step": 7306, "task_loss": 0.2323848009109497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5921459197998047, "epoch": 6.18, "learning_rate": 2.1240725086878936e-05, "loss": 0.7395, "step": 7307, "task_loss": 0.8884536623954773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6686608195304871, "epoch": 6.18, "learning_rate": 2.1236028928336622e-05, "loss": 0.7932, "step": 7308, "task_loss": 1.4810248613357544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6463487148284912, "epoch": 6.18, "learning_rate": 2.123133276979431e-05, "loss": 0.8108, "step": 7309, "task_loss": 0.9177374839782715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5841728448867798, "epoch": 6.18, "learning_rate": 2.1226636611251995e-05, "loss": 0.7669, "step": 7310, "task_loss": 0.11132732778787613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.993439793586731, "epoch": 6.18, "learning_rate": 2.1221940452709685e-05, "loss": 0.932, "step": 7311, "task_loss": 1.1424192190170288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5201187133789062, "epoch": 6.18, "learning_rate": 2.121724429416737e-05, "loss": 0.8276, "step": 7312, "task_loss": 0.3919218182563782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6491808891296387, "epoch": 6.18, "learning_rate": 2.121254813562506e-05, "loss": 0.8441, "step": 7313, "task_loss": 0.7298765182495117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45480260252952576, "epoch": 6.18, "learning_rate": 2.1207851977082747e-05, "loss": 0.6396, "step": 7314, "task_loss": 0.30717793107032776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1135053634643555, "epoch": 6.18, "learning_rate": 2.1203155818540433e-05, "loss": 0.7115, "step": 7315, "task_loss": 1.473063588142395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48425841331481934, "epoch": 6.18, "learning_rate": 2.1198459659998123e-05, "loss": 0.6315, "step": 7316, "task_loss": 0.8654075264930725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6089150905609131, "epoch": 6.19, "learning_rate": 2.119376350145581e-05, "loss": 0.6474, "step": 7317, "task_loss": 1.0710158348083496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47333407402038574, "epoch": 6.19, "learning_rate": 2.11890673429135e-05, "loss": 0.7143, "step": 7318, "task_loss": 1.0157874822616577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0548293590545654, "epoch": 6.19, "learning_rate": 2.1184371184371186e-05, "loss": 0.724, "step": 7319, "task_loss": 0.7107288837432861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8705398440361023, "epoch": 6.19, "learning_rate": 2.1179675025828875e-05, "loss": 0.7251, "step": 7320, "task_loss": 1.6191306114196777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8949341177940369, "epoch": 6.19, "learning_rate": 2.117497886728656e-05, "loss": 0.8921, "step": 7321, "task_loss": 1.4676146507263184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.052672028541565, "epoch": 6.19, "learning_rate": 2.1170282708744248e-05, "loss": 0.8811, "step": 7322, "task_loss": 1.4106152057647705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5523642301559448, "epoch": 6.19, "learning_rate": 2.1165586550201934e-05, "loss": 0.7032, "step": 7323, "task_loss": 0.5388340353965759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9741703867912292, "epoch": 6.19, "learning_rate": 2.1160890391659624e-05, "loss": 0.7069, "step": 7324, "task_loss": 0.3141990005970001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8535448908805847, "epoch": 6.19, "learning_rate": 2.115619423311731e-05, "loss": 0.8445, "step": 7325, "task_loss": 1.0326857566833496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5568419694900513, "epoch": 6.19, "learning_rate": 2.1151498074575e-05, "loss": 0.6067, "step": 7326, "task_loss": 0.21833647787570953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5785733461380005, "epoch": 6.19, "learning_rate": 2.1146801916032686e-05, "loss": 0.6684, "step": 7327, "task_loss": 0.34020188450813293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4029943645000458, "epoch": 6.19, "learning_rate": 2.1142105757490373e-05, "loss": 0.7863, "step": 7328, "task_loss": 0.8310444951057434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6343764066696167, "epoch": 6.2, "learning_rate": 2.113740959894806e-05, "loss": 0.8018, "step": 7329, "task_loss": 0.33967268466949463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8351682424545288, "epoch": 6.2, "learning_rate": 2.113271344040575e-05, "loss": 0.8613, "step": 7330, "task_loss": 0.5369423627853394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5718733072280884, "epoch": 6.2, "learning_rate": 2.112801728186344e-05, "loss": 0.8262, "step": 7331, "task_loss": 1.3266851902008057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9231122732162476, "epoch": 6.2, "learning_rate": 2.1123321123321125e-05, "loss": 0.7441, "step": 7332, "task_loss": 0.7563252449035645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8665092587471008, "epoch": 6.2, "learning_rate": 2.1118624964778815e-05, "loss": 0.8088, "step": 7333, "task_loss": 1.9261761903762817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.559299111366272, "epoch": 6.2, "learning_rate": 2.1113928806236498e-05, "loss": 0.7278, "step": 7334, "task_loss": 0.9001322388648987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4956600069999695, "epoch": 6.2, "learning_rate": 2.1109232647694187e-05, "loss": 0.7159, "step": 7335, "task_loss": 0.31479841470718384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.607801616191864, "epoch": 6.2, "learning_rate": 2.1104536489151874e-05, "loss": 0.5724, "step": 7336, "task_loss": 0.45360565185546875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6336132884025574, "epoch": 6.2, "learning_rate": 2.1099840330609563e-05, "loss": 0.6096, "step": 7337, "task_loss": 0.9167616963386536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6543516516685486, "epoch": 6.2, "learning_rate": 2.109514417206725e-05, "loss": 0.719, "step": 7338, "task_loss": 1.1112895011901855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5357809066772461, "epoch": 6.2, "learning_rate": 2.109044801352494e-05, "loss": 0.5898, "step": 7339, "task_loss": 0.3234281539916992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8484485745429993, "epoch": 6.2, "learning_rate": 2.1085751854982626e-05, "loss": 0.8597, "step": 7340, "task_loss": 0.7773064970970154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6508930325508118, "epoch": 6.21, "learning_rate": 2.1081055696440312e-05, "loss": 0.725, "step": 7341, "task_loss": 0.9968357682228088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5363518595695496, "epoch": 6.21, "learning_rate": 2.1076359537898e-05, "loss": 0.6697, "step": 7342, "task_loss": 1.040891170501709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39853590726852417, "epoch": 6.21, "learning_rate": 2.1071663379355688e-05, "loss": 0.7478, "step": 7343, "task_loss": 0.8653797507286072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5464482307434082, "epoch": 6.21, "learning_rate": 2.1066967220813375e-05, "loss": 0.7491, "step": 7344, "task_loss": 0.4205835163593292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8378185629844666, "epoch": 6.21, "learning_rate": 2.1062271062271064e-05, "loss": 0.6725, "step": 7345, "task_loss": 0.5495076179504395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.498736709356308, "epoch": 6.21, "learning_rate": 2.105757490372875e-05, "loss": 0.8028, "step": 7346, "task_loss": 0.9588196277618408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8683325052261353, "epoch": 6.21, "learning_rate": 2.1052878745186437e-05, "loss": 0.6272, "step": 7347, "task_loss": 1.1555695533752441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4476586580276489, "epoch": 6.21, "learning_rate": 2.1048182586644127e-05, "loss": 0.7327, "step": 7348, "task_loss": 0.4291970431804657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8535405397415161, "epoch": 6.21, "learning_rate": 2.1043486428101813e-05, "loss": 0.8593, "step": 7349, "task_loss": 2.5592613220214844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5869745016098022, "epoch": 6.21, "learning_rate": 2.1038790269559503e-05, "loss": 0.9324, "step": 7350, "task_loss": 0.6312212347984314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8216022849082947, "epoch": 6.21, "learning_rate": 2.103409411101719e-05, "loss": 0.7251, "step": 7351, "task_loss": 1.0759758949279785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0396323204040527, "epoch": 6.21, "learning_rate": 2.102939795247488e-05, "loss": 0.7799, "step": 7352, "task_loss": 1.042245864868164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4657673239707947, "epoch": 6.22, "learning_rate": 2.1024701793932565e-05, "loss": 0.7069, "step": 7353, "task_loss": 1.4343783855438232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5700223445892334, "epoch": 6.22, "learning_rate": 2.102000563539025e-05, "loss": 0.7688, "step": 7354, "task_loss": 0.8816137909889221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6840018033981323, "epoch": 6.22, "learning_rate": 2.1015309476847938e-05, "loss": 0.7185, "step": 7355, "task_loss": 0.567620575428009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7211248874664307, "epoch": 6.22, "learning_rate": 2.1010613318305628e-05, "loss": 0.7571, "step": 7356, "task_loss": 0.9964122772216797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6917285919189453, "epoch": 6.22, "learning_rate": 2.1005917159763314e-05, "loss": 0.6744, "step": 7357, "task_loss": 0.8713920712471008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9537568092346191, "epoch": 6.22, "learning_rate": 2.1001221001221004e-05, "loss": 0.851, "step": 7358, "task_loss": 0.4781615734100342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6019335985183716, "epoch": 6.22, "learning_rate": 2.099652484267869e-05, "loss": 0.5824, "step": 7359, "task_loss": 0.7115597128868103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.813585102558136, "epoch": 6.22, "learning_rate": 2.0991828684136376e-05, "loss": 0.6146, "step": 7360, "task_loss": 0.950802743434906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8140559792518616, "epoch": 6.22, "learning_rate": 2.0987132525594063e-05, "loss": 0.78, "step": 7361, "task_loss": 1.0086610317230225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7177630662918091, "epoch": 6.22, "learning_rate": 2.0982436367051752e-05, "loss": 0.7746, "step": 7362, "task_loss": 0.42572882771492004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7756826281547546, "epoch": 6.22, "learning_rate": 2.0977740208509442e-05, "loss": 0.7536, "step": 7363, "task_loss": 0.887277364730835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0004291534423828, "epoch": 6.22, "learning_rate": 2.097304404996713e-05, "loss": 0.8113, "step": 7364, "task_loss": 0.8581418395042419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5445433855056763, "epoch": 6.23, "learning_rate": 2.0968347891424818e-05, "loss": 0.6255, "step": 7365, "task_loss": 0.9477329254150391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8873623609542847, "epoch": 6.23, "learning_rate": 2.09636517328825e-05, "loss": 0.8642, "step": 7366, "task_loss": 0.3159730136394501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6767006516456604, "epoch": 6.23, "learning_rate": 2.095895557434019e-05, "loss": 0.6448, "step": 7367, "task_loss": 1.4321218729019165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5190590620040894, "epoch": 6.23, "learning_rate": 2.0954259415797877e-05, "loss": 0.9302, "step": 7368, "task_loss": 0.5675565600395203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6800102591514587, "epoch": 6.23, "learning_rate": 2.0949563257255567e-05, "loss": 0.6297, "step": 7369, "task_loss": 1.095626711845398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1350507736206055, "epoch": 6.23, "learning_rate": 2.0944867098713253e-05, "loss": 0.8015, "step": 7370, "task_loss": 1.9677834510803223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8702574372291565, "epoch": 6.23, "learning_rate": 2.0940170940170943e-05, "loss": 0.8522, "step": 7371, "task_loss": 0.4361218512058258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6808396577835083, "epoch": 6.23, "learning_rate": 2.093547478162863e-05, "loss": 0.7099, "step": 7372, "task_loss": 0.4927891492843628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9200088381767273, "epoch": 6.23, "learning_rate": 2.0930778623086316e-05, "loss": 0.8134, "step": 7373, "task_loss": 1.567285180091858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7614140510559082, "epoch": 6.23, "learning_rate": 2.0926082464544002e-05, "loss": 0.7354, "step": 7374, "task_loss": 0.9928926825523376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.598492443561554, "epoch": 6.23, "learning_rate": 2.0921386306001692e-05, "loss": 0.6284, "step": 7375, "task_loss": 0.37793588638305664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6020384430885315, "epoch": 6.23, "learning_rate": 2.0916690147459378e-05, "loss": 0.8495, "step": 7376, "task_loss": 1.2999933958053589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5425344705581665, "epoch": 6.24, "learning_rate": 2.0911993988917068e-05, "loss": 0.8302, "step": 7377, "task_loss": 0.4697367250919342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7062534689903259, "epoch": 6.24, "learning_rate": 2.0907297830374754e-05, "loss": 0.7016, "step": 7378, "task_loss": 1.0670864582061768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8800544142723083, "epoch": 6.24, "learning_rate": 2.090260167183244e-05, "loss": 0.736, "step": 7379, "task_loss": 1.084201693534851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6019288301467896, "epoch": 6.24, "learning_rate": 2.089790551329013e-05, "loss": 0.734, "step": 7380, "task_loss": 0.8099008202552795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7136578559875488, "epoch": 6.24, "learning_rate": 2.0893209354747817e-05, "loss": 0.8275, "step": 7381, "task_loss": 1.4193047285079956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7918050289154053, "epoch": 6.24, "learning_rate": 2.0888513196205506e-05, "loss": 0.8624, "step": 7382, "task_loss": 1.4969455003738403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1410895586013794, "epoch": 6.24, "learning_rate": 2.0883817037663193e-05, "loss": 0.8305, "step": 7383, "task_loss": 1.4415422677993774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4454905092716217, "epoch": 6.24, "learning_rate": 2.0879120879120882e-05, "loss": 0.6169, "step": 7384, "task_loss": 0.3242071866989136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.711545467376709, "epoch": 6.24, "learning_rate": 2.087442472057857e-05, "loss": 0.7018, "step": 7385, "task_loss": 0.5408845543861389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5921383500099182, "epoch": 6.24, "learning_rate": 2.0869728562036255e-05, "loss": 0.5112, "step": 7386, "task_loss": 0.3275664746761322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6860434412956238, "epoch": 6.24, "learning_rate": 2.086503240349394e-05, "loss": 0.5778, "step": 7387, "task_loss": 1.0752062797546387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6848148107528687, "epoch": 6.24, "learning_rate": 2.086033624495163e-05, "loss": 0.7399, "step": 7388, "task_loss": 0.38457104563713074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9039562940597534, "epoch": 6.25, "learning_rate": 2.0855640086409317e-05, "loss": 0.6954, "step": 7389, "task_loss": 0.8517150282859802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8360193967819214, "epoch": 6.25, "learning_rate": 2.0850943927867007e-05, "loss": 0.8241, "step": 7390, "task_loss": 0.44797560572624207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6660614609718323, "epoch": 6.25, "learning_rate": 2.0846247769324694e-05, "loss": 0.7577, "step": 7391, "task_loss": 0.7801387310028076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7820793390274048, "epoch": 6.25, "learning_rate": 2.084155161078238e-05, "loss": 0.7943, "step": 7392, "task_loss": 0.7881274819374084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8924686312675476, "epoch": 6.25, "learning_rate": 2.083685545224007e-05, "loss": 0.6814, "step": 7393, "task_loss": 1.0851572751998901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.782543420791626, "epoch": 6.25, "learning_rate": 2.0832159293697756e-05, "loss": 0.6401, "step": 7394, "task_loss": 0.801999568939209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3347302973270416, "epoch": 6.25, "learning_rate": 2.0827463135155446e-05, "loss": 0.5055, "step": 7395, "task_loss": 0.25452521443367004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36915773153305054, "epoch": 6.25, "learning_rate": 2.0822766976613132e-05, "loss": 0.5236, "step": 7396, "task_loss": 0.33541232347488403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0754159688949585, "epoch": 6.25, "learning_rate": 2.0818070818070822e-05, "loss": 0.7501, "step": 7397, "task_loss": 1.2774888277053833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4966372549533844, "epoch": 6.25, "learning_rate": 2.0813374659528505e-05, "loss": 0.8088, "step": 7398, "task_loss": 0.46942123770713806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7370259761810303, "epoch": 6.25, "learning_rate": 2.0808678500986194e-05, "loss": 0.8851, "step": 7399, "task_loss": 1.8003469705581665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6231669187545776, "epoch": 6.26, "learning_rate": 2.080398234244388e-05, "loss": 0.7086, "step": 7400, "task_loss": 0.4499087929725647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6560161113739014, "epoch": 6.26, "learning_rate": 2.079928618390157e-05, "loss": 0.675, "step": 7401, "task_loss": 0.5352885723114014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.970083475112915, "epoch": 6.26, "learning_rate": 2.0794590025359257e-05, "loss": 0.8866, "step": 7402, "task_loss": 0.97894287109375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8478543162345886, "epoch": 6.26, "learning_rate": 2.0789893866816947e-05, "loss": 0.7535, "step": 7403, "task_loss": 0.5140755772590637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8636865615844727, "epoch": 6.26, "learning_rate": 2.0785197708274633e-05, "loss": 0.6779, "step": 7404, "task_loss": 0.5109339356422424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7495266199111938, "epoch": 6.26, "learning_rate": 2.078050154973232e-05, "loss": 0.7101, "step": 7405, "task_loss": 0.47624218463897705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.263769268989563, "epoch": 6.26, "learning_rate": 2.0775805391190006e-05, "loss": 0.8266, "step": 7406, "task_loss": 0.5004498958587646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7714345455169678, "epoch": 6.26, "learning_rate": 2.0771109232647695e-05, "loss": 0.6225, "step": 7407, "task_loss": 1.243859052658081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6592965126037598, "epoch": 6.26, "learning_rate": 2.0766413074105385e-05, "loss": 0.7783, "step": 7408, "task_loss": 1.0104185342788696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6955978870391846, "epoch": 6.26, "learning_rate": 2.076171691556307e-05, "loss": 0.718, "step": 7409, "task_loss": 0.7346447706222534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5698591470718384, "epoch": 6.26, "learning_rate": 2.0757020757020758e-05, "loss": 0.822, "step": 7410, "task_loss": 0.8182480931282043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0254590511322021, "epoch": 6.26, "learning_rate": 2.0752324598478444e-05, "loss": 0.6849, "step": 7411, "task_loss": 1.1981358528137207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5353381037712097, "epoch": 6.27, "learning_rate": 2.0747628439936134e-05, "loss": 0.7479, "step": 7412, "task_loss": 0.7628188729286194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9288679361343384, "epoch": 6.27, "learning_rate": 2.074293228139382e-05, "loss": 0.9155, "step": 7413, "task_loss": 1.5032144784927368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6620155572891235, "epoch": 6.27, "learning_rate": 2.073823612285151e-05, "loss": 0.69, "step": 7414, "task_loss": 1.3134621381759644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.647847056388855, "epoch": 6.27, "learning_rate": 2.0733539964309196e-05, "loss": 0.7182, "step": 7415, "task_loss": 0.7076156139373779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8471112847328186, "epoch": 6.27, "learning_rate": 2.0728843805766886e-05, "loss": 0.9014, "step": 7416, "task_loss": 2.1045477390289307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45174112915992737, "epoch": 6.27, "learning_rate": 2.072414764722457e-05, "loss": 0.6048, "step": 7417, "task_loss": 0.49247604608535767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9434651732444763, "epoch": 6.27, "learning_rate": 2.071945148868226e-05, "loss": 0.7103, "step": 7418, "task_loss": 0.7788772583007812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7873760461807251, "epoch": 6.27, "learning_rate": 2.0714755330139945e-05, "loss": 0.7307, "step": 7419, "task_loss": 0.4472924768924713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8437372446060181, "epoch": 6.27, "learning_rate": 2.0710059171597635e-05, "loss": 0.7356, "step": 7420, "task_loss": 1.0896718502044678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7178454399108887, "epoch": 6.27, "learning_rate": 2.070536301305532e-05, "loss": 0.6652, "step": 7421, "task_loss": 1.0114970207214355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9972245693206787, "epoch": 6.27, "learning_rate": 2.070066685451301e-05, "loss": 0.8863, "step": 7422, "task_loss": 0.5660697817802429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5490670204162598, "epoch": 6.27, "learning_rate": 2.0695970695970697e-05, "loss": 0.7394, "step": 7423, "task_loss": 0.33690470457077026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6658915281295776, "epoch": 6.28, "learning_rate": 2.0691274537428383e-05, "loss": 0.7109, "step": 7424, "task_loss": 0.1651746779680252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6436148285865784, "epoch": 6.28, "learning_rate": 2.0686578378886073e-05, "loss": 0.7608, "step": 7425, "task_loss": 0.561107337474823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.639808177947998, "epoch": 6.28, "learning_rate": 2.068188222034376e-05, "loss": 0.7102, "step": 7426, "task_loss": 1.1211957931518555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5841596722602844, "epoch": 6.28, "learning_rate": 2.067718606180145e-05, "loss": 0.7198, "step": 7427, "task_loss": 0.4374573528766632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4459114670753479, "epoch": 6.28, "learning_rate": 2.0672489903259136e-05, "loss": 0.5898, "step": 7428, "task_loss": 0.2844398617744446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.625358521938324, "epoch": 6.28, "learning_rate": 2.0667793744716822e-05, "loss": 0.6268, "step": 7429, "task_loss": 0.403626412153244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6517923474311829, "epoch": 6.28, "learning_rate": 2.0663097586174508e-05, "loss": 0.6439, "step": 7430, "task_loss": 0.25348395109176636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8213199973106384, "epoch": 6.28, "learning_rate": 2.0658401427632198e-05, "loss": 0.7712, "step": 7431, "task_loss": 0.9781726598739624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5491828918457031, "epoch": 6.28, "learning_rate": 2.0653705269089884e-05, "loss": 0.4837, "step": 7432, "task_loss": 0.8147094249725342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4717557430267334, "epoch": 6.28, "learning_rate": 2.0649009110547574e-05, "loss": 0.7269, "step": 7433, "task_loss": 0.4121875762939453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.58078533411026, "epoch": 6.28, "learning_rate": 2.064431295200526e-05, "loss": 0.7772, "step": 7434, "task_loss": 0.5593920946121216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5176082849502563, "epoch": 6.28, "learning_rate": 2.063961679346295e-05, "loss": 0.7695, "step": 7435, "task_loss": 0.21882949769496918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5749944448471069, "epoch": 6.29, "learning_rate": 2.0634920634920636e-05, "loss": 0.6332, "step": 7436, "task_loss": 0.28883999586105347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8733267784118652, "epoch": 6.29, "learning_rate": 2.0630224476378323e-05, "loss": 0.6528, "step": 7437, "task_loss": 0.38436993956565857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9379607439041138, "epoch": 6.29, "learning_rate": 2.062552831783601e-05, "loss": 1.0223, "step": 7438, "task_loss": 0.9450372457504272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5678895711898804, "epoch": 6.29, "learning_rate": 2.06208321592937e-05, "loss": 0.6074, "step": 7439, "task_loss": 0.7237013578414917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.995269775390625, "epoch": 6.29, "learning_rate": 2.061613600075139e-05, "loss": 0.6616, "step": 7440, "task_loss": 2.4173288345336914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5767439007759094, "epoch": 6.29, "learning_rate": 2.0611439842209075e-05, "loss": 0.8877, "step": 7441, "task_loss": 0.9566618800163269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45760712027549744, "epoch": 6.29, "learning_rate": 2.060674368366676e-05, "loss": 0.8162, "step": 7442, "task_loss": 0.9467642903327942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7120965719223022, "epoch": 6.29, "learning_rate": 2.0602047525124448e-05, "loss": 0.6465, "step": 7443, "task_loss": 0.565067708492279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3690289258956909, "epoch": 6.29, "learning_rate": 2.0597351366582137e-05, "loss": 0.4867, "step": 7444, "task_loss": 0.3796667456626892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7559669017791748, "epoch": 6.29, "learning_rate": 2.0592655208039824e-05, "loss": 0.9914, "step": 7445, "task_loss": 0.41656193137168884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9877196550369263, "epoch": 6.29, "learning_rate": 2.0587959049497513e-05, "loss": 0.7645, "step": 7446, "task_loss": 0.8209524750709534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7247501611709595, "epoch": 6.29, "learning_rate": 2.05832628909552e-05, "loss": 0.643, "step": 7447, "task_loss": 0.5943114161491394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.577051043510437, "epoch": 6.3, "learning_rate": 2.057856673241289e-05, "loss": 0.9667, "step": 7448, "task_loss": 1.5752742290496826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5197546482086182, "epoch": 6.3, "learning_rate": 2.0573870573870572e-05, "loss": 0.6475, "step": 7449, "task_loss": 0.8190158009529114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7175495028495789, "epoch": 6.3, "learning_rate": 2.0569174415328262e-05, "loss": 0.8633, "step": 7450, "task_loss": 1.209347128868103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5831199884414673, "epoch": 6.3, "learning_rate": 2.056447825678595e-05, "loss": 0.7285, "step": 7451, "task_loss": 0.6362935304641724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6112349033355713, "epoch": 6.3, "learning_rate": 2.0559782098243638e-05, "loss": 0.6739, "step": 7452, "task_loss": 0.08905143290758133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7734523415565491, "epoch": 6.3, "learning_rate": 2.0555085939701324e-05, "loss": 0.7364, "step": 7453, "task_loss": 0.9012682437896729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6790093183517456, "epoch": 6.3, "learning_rate": 2.0550389781159014e-05, "loss": 0.5586, "step": 7454, "task_loss": 0.601475179195404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0332776308059692, "epoch": 6.3, "learning_rate": 2.05456936226167e-05, "loss": 0.9087, "step": 7455, "task_loss": 2.1095285415649414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6096278429031372, "epoch": 6.3, "learning_rate": 2.0540997464074387e-05, "loss": 0.7598, "step": 7456, "task_loss": 0.5822157859802246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5323781967163086, "epoch": 6.3, "learning_rate": 2.0536301305532077e-05, "loss": 0.5418, "step": 7457, "task_loss": 1.309901475906372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7867894172668457, "epoch": 6.3, "learning_rate": 2.0531605146989763e-05, "loss": 0.6514, "step": 7458, "task_loss": 0.7726338505744934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7013118863105774, "epoch": 6.3, "learning_rate": 2.0526908988447453e-05, "loss": 0.6923, "step": 7459, "task_loss": 1.0420209169387817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9644782543182373, "epoch": 6.31, "learning_rate": 2.052221282990514e-05, "loss": 0.8714, "step": 7460, "task_loss": 0.9579098224639893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.830389678478241, "epoch": 6.31, "learning_rate": 2.0517516671362825e-05, "loss": 0.6839, "step": 7461, "task_loss": 0.47992780804634094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7314928770065308, "epoch": 6.31, "learning_rate": 2.0512820512820512e-05, "loss": 0.6027, "step": 7462, "task_loss": 0.5683212280273438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5341675877571106, "epoch": 6.31, "learning_rate": 2.05081243542782e-05, "loss": 0.8847, "step": 7463, "task_loss": 0.2897273600101471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6007897853851318, "epoch": 6.31, "learning_rate": 2.0503428195735888e-05, "loss": 0.6289, "step": 7464, "task_loss": 0.656609296798706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.657687783241272, "epoch": 6.31, "learning_rate": 2.0498732037193578e-05, "loss": 0.7761, "step": 7465, "task_loss": 0.36363911628723145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6782404184341431, "epoch": 6.31, "learning_rate": 2.0494035878651264e-05, "loss": 0.5656, "step": 7466, "task_loss": 0.48114439845085144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8028154373168945, "epoch": 6.31, "learning_rate": 2.0489339720108954e-05, "loss": 0.9256, "step": 7467, "task_loss": 0.7033087611198425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0191971063613892, "epoch": 6.31, "learning_rate": 2.0484643561566637e-05, "loss": 0.8434, "step": 7468, "task_loss": 1.560568928718567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6387439370155334, "epoch": 6.31, "learning_rate": 2.0479947403024326e-05, "loss": 0.8646, "step": 7469, "task_loss": 0.9759453535079956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7657668590545654, "epoch": 6.31, "learning_rate": 2.0475251244482016e-05, "loss": 0.87, "step": 7470, "task_loss": 0.7411835789680481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.594607949256897, "epoch": 6.32, "learning_rate": 2.0470555085939702e-05, "loss": 0.7689, "step": 7471, "task_loss": 1.0351815223693848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8708657622337341, "epoch": 6.32, "learning_rate": 2.0465858927397392e-05, "loss": 0.8518, "step": 7472, "task_loss": 0.953616201877594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5274431705474854, "epoch": 6.32, "learning_rate": 2.046116276885508e-05, "loss": 0.8056, "step": 7473, "task_loss": 1.243177056312561 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.076969861984253, "epoch": 6.32, "learning_rate": 2.0456466610312765e-05, "loss": 0.8989, "step": 7474, "task_loss": 1.460528016090393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.007758378982544, "epoch": 6.32, "learning_rate": 2.045177045177045e-05, "loss": 0.9413, "step": 7475, "task_loss": 0.9324761629104614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6358864307403564, "epoch": 6.32, "learning_rate": 2.044707429322814e-05, "loss": 0.6536, "step": 7476, "task_loss": 0.9914551973342896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3383448123931885, "epoch": 6.32, "learning_rate": 2.0442378134685827e-05, "loss": 0.4839, "step": 7477, "task_loss": 0.6253867745399475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7082047462463379, "epoch": 6.32, "learning_rate": 2.0437681976143517e-05, "loss": 0.5585, "step": 7478, "task_loss": 0.5273516178131104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2830662727355957, "epoch": 6.32, "learning_rate": 2.0432985817601203e-05, "loss": 0.7628, "step": 7479, "task_loss": 1.4305071830749512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8089059591293335, "epoch": 6.32, "learning_rate": 2.0428289659058893e-05, "loss": 0.8319, "step": 7480, "task_loss": 1.2935988903045654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7173799872398376, "epoch": 6.32, "learning_rate": 2.0423593500516576e-05, "loss": 0.6673, "step": 7481, "task_loss": 0.5367063283920288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6817440390586853, "epoch": 6.32, "learning_rate": 2.0418897341974266e-05, "loss": 0.7814, "step": 7482, "task_loss": 0.470753937959671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6093273162841797, "epoch": 6.33, "learning_rate": 2.0414201183431952e-05, "loss": 0.5034, "step": 7483, "task_loss": 0.5747882723808289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8942415118217468, "epoch": 6.33, "learning_rate": 2.040950502488964e-05, "loss": 0.8817, "step": 7484, "task_loss": 0.9002119302749634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9038556814193726, "epoch": 6.33, "learning_rate": 2.040480886634733e-05, "loss": 0.6159, "step": 7485, "task_loss": 0.4644799530506134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3986700475215912, "epoch": 6.33, "learning_rate": 2.0400112707805018e-05, "loss": 0.614, "step": 7486, "task_loss": 0.8114076852798462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.615193784236908, "epoch": 6.33, "learning_rate": 2.0395416549262704e-05, "loss": 0.7084, "step": 7487, "task_loss": 1.118685245513916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6005839705467224, "epoch": 6.33, "learning_rate": 2.039072039072039e-05, "loss": 0.6433, "step": 7488, "task_loss": 0.23159384727478027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29610350728034973, "epoch": 6.33, "learning_rate": 2.038602423217808e-05, "loss": 0.4865, "step": 7489, "task_loss": 0.09304363280534744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8327248096466064, "epoch": 6.33, "learning_rate": 2.0381328073635766e-05, "loss": 0.7264, "step": 7490, "task_loss": 0.39820367097854614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.745837390422821, "epoch": 6.33, "learning_rate": 2.0376631915093456e-05, "loss": 0.7703, "step": 7491, "task_loss": 0.5324692130088806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5647717118263245, "epoch": 6.33, "learning_rate": 2.0371935756551143e-05, "loss": 0.6824, "step": 7492, "task_loss": 0.2749108076095581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8552236557006836, "epoch": 6.33, "learning_rate": 2.036723959800883e-05, "loss": 0.9212, "step": 7493, "task_loss": 1.0652554035186768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.28269845247268677, "epoch": 6.33, "learning_rate": 2.0362543439466515e-05, "loss": 0.6155, "step": 7494, "task_loss": 0.8914753794670105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7073753476142883, "epoch": 6.34, "learning_rate": 2.0357847280924205e-05, "loss": 0.8298, "step": 7495, "task_loss": 1.0167415142059326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.553243100643158, "epoch": 6.34, "learning_rate": 2.035315112238189e-05, "loss": 0.6375, "step": 7496, "task_loss": 0.44299423694610596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7693206071853638, "epoch": 6.34, "learning_rate": 2.034845496383958e-05, "loss": 0.659, "step": 7497, "task_loss": 0.7921037673950195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46855103969573975, "epoch": 6.34, "learning_rate": 2.0343758805297267e-05, "loss": 0.7934, "step": 7498, "task_loss": 0.5274487733840942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9306718707084656, "epoch": 6.34, "learning_rate": 2.0339062646754957e-05, "loss": 0.8055, "step": 7499, "task_loss": 0.9237930774688721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9580797553062439, "epoch": 6.34, "learning_rate": 2.033436648821264e-05, "loss": 0.9033, "step": 7500, "task_loss": 1.1360944509506226 }, { "epoch": 6.34, "eval_accuracy": 0.8900990099009901, "eval_loss": 0.46387961506843567, "eval_runtime": 224.1421, "eval_samples_per_second": 112.652, "eval_steps_per_second": 0.883, "step": 7500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6015224456787109, "epoch": 6.34, "learning_rate": 2.032967032967033e-05, "loss": 0.621, "step": 7501, "task_loss": 1.3370813131332397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7571489214897156, "epoch": 6.34, "learning_rate": 2.032497417112802e-05, "loss": 0.8201, "step": 7502, "task_loss": 0.21561077237129211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6696159839630127, "epoch": 6.34, "learning_rate": 2.0320278012585706e-05, "loss": 0.5725, "step": 7503, "task_loss": 0.5763849020004272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6592567563056946, "epoch": 6.34, "learning_rate": 2.0315581854043396e-05, "loss": 0.7906, "step": 7504, "task_loss": 0.5422599911689758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6221115589141846, "epoch": 6.34, "learning_rate": 2.0310885695501082e-05, "loss": 0.6758, "step": 7505, "task_loss": 0.5758947134017944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9166711568832397, "epoch": 6.34, "learning_rate": 2.0306189536958768e-05, "loss": 0.7145, "step": 7506, "task_loss": 1.0751111507415771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5272126197814941, "epoch": 6.35, "learning_rate": 2.0301493378416455e-05, "loss": 0.6651, "step": 7507, "task_loss": 0.36240291595458984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7971782684326172, "epoch": 6.35, "learning_rate": 2.0296797219874144e-05, "loss": 0.6775, "step": 7508, "task_loss": 0.12950320541858673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5930710434913635, "epoch": 6.35, "learning_rate": 2.029210106133183e-05, "loss": 0.611, "step": 7509, "task_loss": 1.3972631692886353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0156856775283813, "epoch": 6.35, "learning_rate": 2.028740490278952e-05, "loss": 0.7312, "step": 7510, "task_loss": 0.763160765171051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3862418532371521, "epoch": 6.35, "learning_rate": 2.0282708744247207e-05, "loss": 0.6469, "step": 7511, "task_loss": 0.6058636903762817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5594426393508911, "epoch": 6.35, "learning_rate": 2.0278012585704893e-05, "loss": 0.4972, "step": 7512, "task_loss": 0.8961949348449707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5818049907684326, "epoch": 6.35, "learning_rate": 2.027331642716258e-05, "loss": 0.7407, "step": 7513, "task_loss": 0.4986461102962494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7506883144378662, "epoch": 6.35, "learning_rate": 2.026862026862027e-05, "loss": 0.7483, "step": 7514, "task_loss": 0.5851574540138245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4200134575366974, "epoch": 6.35, "learning_rate": 2.0263924110077955e-05, "loss": 0.4895, "step": 7515, "task_loss": 0.7297118902206421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0649962425231934, "epoch": 6.35, "learning_rate": 2.0259227951535645e-05, "loss": 0.7336, "step": 7516, "task_loss": 0.7565789222717285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7488821744918823, "epoch": 6.35, "learning_rate": 2.0254531792993335e-05, "loss": 0.6257, "step": 7517, "task_loss": 0.08181533962488174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46530887484550476, "epoch": 6.35, "learning_rate": 2.024983563445102e-05, "loss": 0.6039, "step": 7518, "task_loss": 0.5959436297416687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8725045919418335, "epoch": 6.36, "learning_rate": 2.0245139475908708e-05, "loss": 0.6808, "step": 7519, "task_loss": 0.4625833034515381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41218000650405884, "epoch": 6.36, "learning_rate": 2.0240443317366394e-05, "loss": 0.8614, "step": 7520, "task_loss": 0.4505777359008789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8770926594734192, "epoch": 6.36, "learning_rate": 2.0235747158824084e-05, "loss": 0.7532, "step": 7521, "task_loss": 1.1020960807800293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7384219169616699, "epoch": 6.36, "learning_rate": 2.023105100028177e-05, "loss": 0.7743, "step": 7522, "task_loss": 1.9696482419967651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.529212474822998, "epoch": 6.36, "learning_rate": 2.022635484173946e-05, "loss": 0.7003, "step": 7523, "task_loss": 0.1800096035003662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8090583086013794, "epoch": 6.36, "learning_rate": 2.0221658683197146e-05, "loss": 0.7884, "step": 7524, "task_loss": 1.4432851076126099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5726295709609985, "epoch": 6.36, "learning_rate": 2.0216962524654832e-05, "loss": 0.8584, "step": 7525, "task_loss": 0.8945629596710205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.571685791015625, "epoch": 6.36, "learning_rate": 2.021226636611252e-05, "loss": 0.7218, "step": 7526, "task_loss": 0.7889978885650635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5667617917060852, "epoch": 6.36, "learning_rate": 2.020757020757021e-05, "loss": 0.6437, "step": 7527, "task_loss": 0.29191315174102783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8311644196510315, "epoch": 6.36, "learning_rate": 2.0202874049027895e-05, "loss": 0.7167, "step": 7528, "task_loss": 0.5383474230766296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1373469829559326, "epoch": 6.36, "learning_rate": 2.0198177890485585e-05, "loss": 0.8188, "step": 7529, "task_loss": 1.556073784828186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9268093705177307, "epoch": 6.36, "learning_rate": 2.019348173194327e-05, "loss": 0.7467, "step": 7530, "task_loss": 0.2620450258255005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5025049448013306, "epoch": 6.37, "learning_rate": 2.018878557340096e-05, "loss": 0.5286, "step": 7531, "task_loss": 0.12240614742040634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41687071323394775, "epoch": 6.37, "learning_rate": 2.0184089414858647e-05, "loss": 0.7509, "step": 7532, "task_loss": 0.5399699211120605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7134346961975098, "epoch": 6.37, "learning_rate": 2.0179393256316333e-05, "loss": 0.7956, "step": 7533, "task_loss": 0.8530808687210083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6084931492805481, "epoch": 6.37, "learning_rate": 2.0174697097774023e-05, "loss": 0.5406, "step": 7534, "task_loss": 0.18914145231246948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9830994009971619, "epoch": 6.37, "learning_rate": 2.017000093923171e-05, "loss": 0.8593, "step": 7535, "task_loss": 1.2682170867919922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7192038297653198, "epoch": 6.37, "learning_rate": 2.01653047806894e-05, "loss": 0.8045, "step": 7536, "task_loss": 0.32773855328559875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5129482746124268, "epoch": 6.37, "learning_rate": 2.0160608622147085e-05, "loss": 0.6862, "step": 7537, "task_loss": 0.10734935849905014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9399733543395996, "epoch": 6.37, "learning_rate": 2.0155912463604772e-05, "loss": 0.7283, "step": 7538, "task_loss": 1.0746405124664307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.989948034286499, "epoch": 6.37, "learning_rate": 2.0151216305062458e-05, "loss": 0.9355, "step": 7539, "task_loss": 1.3781249523162842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8575922846794128, "epoch": 6.37, "learning_rate": 2.0146520146520148e-05, "loss": 0.6739, "step": 7540, "task_loss": 0.5839276313781738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3544926643371582, "epoch": 6.37, "learning_rate": 2.0141823987977834e-05, "loss": 0.5892, "step": 7541, "task_loss": 0.8095149993896484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3523016571998596, "epoch": 6.38, "learning_rate": 2.0137127829435524e-05, "loss": 0.5833, "step": 7542, "task_loss": 0.0800480768084526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7330973148345947, "epoch": 6.38, "learning_rate": 2.013243167089321e-05, "loss": 0.6861, "step": 7543, "task_loss": 0.6397478580474854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.924391508102417, "epoch": 6.38, "learning_rate": 2.0127735512350897e-05, "loss": 0.8323, "step": 7544, "task_loss": 0.5493851900100708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5518218278884888, "epoch": 6.38, "learning_rate": 2.0123039353808583e-05, "loss": 0.7512, "step": 7545, "task_loss": 0.438368022441864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6601275205612183, "epoch": 6.38, "learning_rate": 2.0118343195266273e-05, "loss": 0.638, "step": 7546, "task_loss": 0.3501250147819519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47632694244384766, "epoch": 6.38, "learning_rate": 2.0113647036723962e-05, "loss": 0.6199, "step": 7547, "task_loss": 0.9881547689437866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.86155104637146, "epoch": 6.38, "learning_rate": 2.010895087818165e-05, "loss": 0.6395, "step": 7548, "task_loss": 1.0654985904693604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9333109259605408, "epoch": 6.38, "learning_rate": 2.010425471963934e-05, "loss": 0.8344, "step": 7549, "task_loss": 1.2366564273834229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6516996622085571, "epoch": 6.38, "learning_rate": 2.0099558561097025e-05, "loss": 0.5759, "step": 7550, "task_loss": 0.3249874413013458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0305602550506592, "epoch": 6.38, "learning_rate": 2.009486240255471e-05, "loss": 0.8122, "step": 7551, "task_loss": 0.9237790107727051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6171298027038574, "epoch": 6.38, "learning_rate": 2.0090166244012397e-05, "loss": 0.7599, "step": 7552, "task_loss": 0.8630193471908569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6405401229858398, "epoch": 6.38, "learning_rate": 2.0085470085470087e-05, "loss": 0.5845, "step": 7553, "task_loss": 0.562742292881012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5675578713417053, "epoch": 6.39, "learning_rate": 2.0080773926927774e-05, "loss": 0.7355, "step": 7554, "task_loss": 0.7774025797843933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5333622097969055, "epoch": 6.39, "learning_rate": 2.0076077768385463e-05, "loss": 0.7761, "step": 7555, "task_loss": 0.6836633086204529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6741619110107422, "epoch": 6.39, "learning_rate": 2.007138160984315e-05, "loss": 0.7016, "step": 7556, "task_loss": 0.8201513886451721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6026860475540161, "epoch": 6.39, "learning_rate": 2.0066685451300836e-05, "loss": 0.9025, "step": 7557, "task_loss": 1.0753093957901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9496968984603882, "epoch": 6.39, "learning_rate": 2.0061989292758522e-05, "loss": 0.8608, "step": 7558, "task_loss": 1.0126564502716064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49738821387290955, "epoch": 6.39, "learning_rate": 2.0057293134216212e-05, "loss": 0.8987, "step": 7559, "task_loss": 0.6971676349639893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.314242959022522, "epoch": 6.39, "learning_rate": 2.00525969756739e-05, "loss": 0.7806, "step": 7560, "task_loss": 1.330492615699768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.698218822479248, "epoch": 6.39, "learning_rate": 2.0047900817131588e-05, "loss": 0.7729, "step": 7561, "task_loss": 1.3742175102233887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6516304016113281, "epoch": 6.39, "learning_rate": 2.0043204658589278e-05, "loss": 0.5506, "step": 7562, "task_loss": 0.6116172075271606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5695991516113281, "epoch": 6.39, "learning_rate": 2.003850850004696e-05, "loss": 0.8759, "step": 7563, "task_loss": 0.39051172137260437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8792805075645447, "epoch": 6.39, "learning_rate": 2.003381234150465e-05, "loss": 0.7975, "step": 7564, "task_loss": 1.3124067783355713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9701693058013916, "epoch": 6.39, "learning_rate": 2.0029116182962337e-05, "loss": 0.8117, "step": 7565, "task_loss": 1.3931810855865479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9400575757026672, "epoch": 6.4, "learning_rate": 2.0024420024420027e-05, "loss": 0.9198, "step": 7566, "task_loss": 1.1256625652313232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.960407555103302, "epoch": 6.4, "learning_rate": 2.0019723865877713e-05, "loss": 0.8827, "step": 7567, "task_loss": 0.6806681752204895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0089225769042969, "epoch": 6.4, "learning_rate": 2.0015027707335403e-05, "loss": 0.7125, "step": 7568, "task_loss": 0.8413298726081848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1611669063568115, "epoch": 6.4, "learning_rate": 2.001033154879309e-05, "loss": 1.2429, "step": 7569, "task_loss": 1.017464518547058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46277108788490295, "epoch": 6.4, "learning_rate": 2.0005635390250775e-05, "loss": 0.7474, "step": 7570, "task_loss": 0.4586963653564453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9043039679527283, "epoch": 6.4, "learning_rate": 2.000093923170846e-05, "loss": 0.8246, "step": 7571, "task_loss": 0.963898777961731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4648383855819702, "epoch": 6.4, "learning_rate": 1.999624307316615e-05, "loss": 0.742, "step": 7572, "task_loss": 0.6256142258644104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.535918116569519, "epoch": 6.4, "learning_rate": 1.9991546914623838e-05, "loss": 0.6955, "step": 7573, "task_loss": 0.6858159899711609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9700036644935608, "epoch": 6.4, "learning_rate": 1.9986850756081527e-05, "loss": 0.789, "step": 7574, "task_loss": 0.9131894707679749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.118187427520752, "epoch": 6.4, "learning_rate": 1.9982154597539214e-05, "loss": 0.8254, "step": 7575, "task_loss": 1.619396686553955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5872036814689636, "epoch": 6.4, "learning_rate": 1.99774584389969e-05, "loss": 0.8007, "step": 7576, "task_loss": 0.5500854849815369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7741221785545349, "epoch": 6.4, "learning_rate": 1.9972762280454586e-05, "loss": 0.8479, "step": 7577, "task_loss": 0.5708940625190735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5743619203567505, "epoch": 6.41, "learning_rate": 1.9968066121912276e-05, "loss": 0.6268, "step": 7578, "task_loss": 0.8144269585609436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5421385765075684, "epoch": 6.41, "learning_rate": 1.9963369963369966e-05, "loss": 0.6049, "step": 7579, "task_loss": 0.591280996799469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8099977970123291, "epoch": 6.41, "learning_rate": 1.9958673804827652e-05, "loss": 0.7554, "step": 7580, "task_loss": 1.5657055377960205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4924519658088684, "epoch": 6.41, "learning_rate": 1.9953977646285342e-05, "loss": 0.5434, "step": 7581, "task_loss": 0.48774635791778564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3650176227092743, "epoch": 6.41, "learning_rate": 1.994928148774303e-05, "loss": 0.4979, "step": 7582, "task_loss": 0.5318066477775574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.781067967414856, "epoch": 6.41, "learning_rate": 1.9944585329200715e-05, "loss": 0.8223, "step": 7583, "task_loss": 0.9674649238586426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6544642448425293, "epoch": 6.41, "learning_rate": 1.99398891706584e-05, "loss": 0.6563, "step": 7584, "task_loss": 0.567378580570221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7126490473747253, "epoch": 6.41, "learning_rate": 1.993519301211609e-05, "loss": 0.6591, "step": 7585, "task_loss": 1.173796534538269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6634585857391357, "epoch": 6.41, "learning_rate": 1.9930496853573777e-05, "loss": 0.7826, "step": 7586, "task_loss": 0.8453861474990845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7112157940864563, "epoch": 6.41, "learning_rate": 1.9925800695031467e-05, "loss": 0.7488, "step": 7587, "task_loss": 1.6424909830093384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5098986625671387, "epoch": 6.41, "learning_rate": 1.9921104536489153e-05, "loss": 0.5564, "step": 7588, "task_loss": 0.9585261940956116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6131045818328857, "epoch": 6.41, "learning_rate": 1.991640837794684e-05, "loss": 0.7706, "step": 7589, "task_loss": 0.961346447467804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7709530591964722, "epoch": 6.42, "learning_rate": 1.9911712219404526e-05, "loss": 0.7138, "step": 7590, "task_loss": 1.2603553533554077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5352152585983276, "epoch": 6.42, "learning_rate": 1.9907016060862216e-05, "loss": 0.6646, "step": 7591, "task_loss": 1.1918413639068604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7155801653862, "epoch": 6.42, "learning_rate": 1.9902319902319902e-05, "loss": 0.9291, "step": 7592, "task_loss": 0.2691490948200226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5928158760070801, "epoch": 6.42, "learning_rate": 1.989762374377759e-05, "loss": 0.6651, "step": 7593, "task_loss": 0.659570574760437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8254902958869934, "epoch": 6.42, "learning_rate": 1.989292758523528e-05, "loss": 0.7533, "step": 7594, "task_loss": 1.8429588079452515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4977198541164398, "epoch": 6.42, "learning_rate": 1.9888231426692964e-05, "loss": 0.5974, "step": 7595, "task_loss": 1.4751795530319214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.78270024061203, "epoch": 6.42, "learning_rate": 1.9883535268150654e-05, "loss": 0.8539, "step": 7596, "task_loss": 0.7158774733543396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.963963508605957, "epoch": 6.42, "learning_rate": 1.987883910960834e-05, "loss": 0.8911, "step": 7597, "task_loss": 1.4106472730636597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32838934659957886, "epoch": 6.42, "learning_rate": 1.987414295106603e-05, "loss": 0.7086, "step": 7598, "task_loss": 0.409552663564682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6837931871414185, "epoch": 6.42, "learning_rate": 1.9869446792523716e-05, "loss": 0.6698, "step": 7599, "task_loss": 1.436464786529541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7694858312606812, "epoch": 6.42, "learning_rate": 1.9864750633981406e-05, "loss": 0.6659, "step": 7600, "task_loss": 1.0055369138717651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9002770185470581, "epoch": 6.42, "learning_rate": 1.9860054475439093e-05, "loss": 0.5729, "step": 7601, "task_loss": 2.0935122966766357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.501176118850708, "epoch": 6.43, "learning_rate": 1.985535831689678e-05, "loss": 0.6041, "step": 7602, "task_loss": 0.49530351161956787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9207032322883606, "epoch": 6.43, "learning_rate": 1.9850662158354465e-05, "loss": 0.8471, "step": 7603, "task_loss": 0.7134621143341064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5259947776794434, "epoch": 6.43, "learning_rate": 1.9845965999812155e-05, "loss": 0.6267, "step": 7604, "task_loss": 0.8649637699127197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7972732186317444, "epoch": 6.43, "learning_rate": 1.984126984126984e-05, "loss": 0.7902, "step": 7605, "task_loss": 0.6508281230926514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.555101752281189, "epoch": 6.43, "learning_rate": 1.983657368272753e-05, "loss": 0.5839, "step": 7606, "task_loss": 0.8467839360237122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7618261575698853, "epoch": 6.43, "learning_rate": 1.9831877524185217e-05, "loss": 0.6496, "step": 7607, "task_loss": 1.4745838642120361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6315302848815918, "epoch": 6.43, "learning_rate": 1.9827181365642904e-05, "loss": 0.6001, "step": 7608, "task_loss": 0.49629005789756775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7458266615867615, "epoch": 6.43, "learning_rate": 1.9822485207100593e-05, "loss": 0.6493, "step": 7609, "task_loss": 1.5121029615402222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0862457752227783, "epoch": 6.43, "learning_rate": 1.981778904855828e-05, "loss": 0.6298, "step": 7610, "task_loss": 0.8248926997184753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7236902713775635, "epoch": 6.43, "learning_rate": 1.981309289001597e-05, "loss": 0.5707, "step": 7611, "task_loss": 1.6086007356643677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5271357893943787, "epoch": 6.43, "learning_rate": 1.9808396731473656e-05, "loss": 0.6203, "step": 7612, "task_loss": 0.8922145962715149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.648913562297821, "epoch": 6.44, "learning_rate": 1.9803700572931346e-05, "loss": 0.601, "step": 7613, "task_loss": 0.489500492811203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0278942584991455, "epoch": 6.44, "learning_rate": 1.9799004414389032e-05, "loss": 0.9064, "step": 7614, "task_loss": 1.0742533206939697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7253291606903076, "epoch": 6.44, "learning_rate": 1.9794308255846718e-05, "loss": 0.8762, "step": 7615, "task_loss": 0.7807506918907166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6017560958862305, "epoch": 6.44, "learning_rate": 1.9789612097304405e-05, "loss": 0.7727, "step": 7616, "task_loss": 1.2136317491531372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5531520247459412, "epoch": 6.44, "learning_rate": 1.9784915938762094e-05, "loss": 0.7134, "step": 7617, "task_loss": 0.7991658449172974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6275091171264648, "epoch": 6.44, "learning_rate": 1.978021978021978e-05, "loss": 0.7399, "step": 7618, "task_loss": 0.9509278535842896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.68482506275177, "epoch": 6.44, "learning_rate": 1.977552362167747e-05, "loss": 0.6926, "step": 7619, "task_loss": 1.0630697011947632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7343887686729431, "epoch": 6.44, "learning_rate": 1.9770827463135157e-05, "loss": 0.7095, "step": 7620, "task_loss": 1.308393120765686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7998436689376831, "epoch": 6.44, "learning_rate": 1.9766131304592843e-05, "loss": 0.6162, "step": 7621, "task_loss": 0.5980373024940491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5240861177444458, "epoch": 6.44, "learning_rate": 1.976143514605053e-05, "loss": 0.7426, "step": 7622, "task_loss": 1.016555666923523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4134431779384613, "epoch": 6.44, "learning_rate": 1.975673898750822e-05, "loss": 0.5896, "step": 7623, "task_loss": 1.014450192451477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5162676572799683, "epoch": 6.44, "learning_rate": 1.975204282896591e-05, "loss": 0.5638, "step": 7624, "task_loss": 0.3674808144569397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.601759135723114, "epoch": 6.45, "learning_rate": 1.9747346670423595e-05, "loss": 0.8568, "step": 7625, "task_loss": 0.6220812201499939 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5700751543045044, "epoch": 6.45, "learning_rate": 1.9742650511881285e-05, "loss": 0.7039, "step": 7626, "task_loss": 0.7387523651123047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.75424724817276, "epoch": 6.45, "learning_rate": 1.9737954353338968e-05, "loss": 0.7548, "step": 7627, "task_loss": 1.0633270740509033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8591405153274536, "epoch": 6.45, "learning_rate": 1.9733258194796658e-05, "loss": 0.7837, "step": 7628, "task_loss": 1.4717028141021729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5799170732498169, "epoch": 6.45, "learning_rate": 1.9728562036254344e-05, "loss": 0.5606, "step": 7629, "task_loss": 0.47799035906791687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5793362855911255, "epoch": 6.45, "learning_rate": 1.9723865877712034e-05, "loss": 0.7691, "step": 7630, "task_loss": 0.3183720111846924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6167230606079102, "epoch": 6.45, "learning_rate": 1.971916971916972e-05, "loss": 0.6241, "step": 7631, "task_loss": 0.6344899535179138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6527011394500732, "epoch": 6.45, "learning_rate": 1.971447356062741e-05, "loss": 0.8406, "step": 7632, "task_loss": 0.810448169708252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8616532683372498, "epoch": 6.45, "learning_rate": 1.9709777402085096e-05, "loss": 0.627, "step": 7633, "task_loss": 0.26179444789886475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5772684812545776, "epoch": 6.45, "learning_rate": 1.9705081243542782e-05, "loss": 0.5817, "step": 7634, "task_loss": 0.68758225440979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8178844451904297, "epoch": 6.45, "learning_rate": 1.970038508500047e-05, "loss": 0.6956, "step": 7635, "task_loss": 0.6302015781402588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6530711650848389, "epoch": 6.45, "learning_rate": 1.969568892645816e-05, "loss": 0.5727, "step": 7636, "task_loss": 0.6656954288482666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36230525374412537, "epoch": 6.46, "learning_rate": 1.9690992767915845e-05, "loss": 0.6158, "step": 7637, "task_loss": 0.034238796681165695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7758245468139648, "epoch": 6.46, "learning_rate": 1.9686296609373535e-05, "loss": 0.7444, "step": 7638, "task_loss": 2.171314001083374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9989791512489319, "epoch": 6.46, "learning_rate": 1.968160045083122e-05, "loss": 1.0636, "step": 7639, "task_loss": 0.28941887617111206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5326361656188965, "epoch": 6.46, "learning_rate": 1.9676904292288907e-05, "loss": 0.7528, "step": 7640, "task_loss": 1.0021177530288696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1371498107910156, "epoch": 6.46, "learning_rate": 1.9672208133746597e-05, "loss": 0.7863, "step": 7641, "task_loss": 1.5893747806549072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.012179970741272, "epoch": 6.46, "learning_rate": 1.9667511975204283e-05, "loss": 0.7357, "step": 7642, "task_loss": 0.6353057026863098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0637400150299072, "epoch": 6.46, "learning_rate": 1.9662815816661973e-05, "loss": 0.8773, "step": 7643, "task_loss": 0.9398890137672424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5559054613113403, "epoch": 6.46, "learning_rate": 1.965811965811966e-05, "loss": 0.5633, "step": 7644, "task_loss": 0.4773670732975006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5801807045936584, "epoch": 6.46, "learning_rate": 1.965342349957735e-05, "loss": 0.6111, "step": 7645, "task_loss": 0.7597885727882385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9052151441574097, "epoch": 6.46, "learning_rate": 1.9648727341035032e-05, "loss": 0.842, "step": 7646, "task_loss": 0.7041805982589722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8911800384521484, "epoch": 6.46, "learning_rate": 1.9644031182492722e-05, "loss": 0.6724, "step": 7647, "task_loss": 0.6667134165763855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5074751377105713, "epoch": 6.46, "learning_rate": 1.9639335023950408e-05, "loss": 0.4778, "step": 7648, "task_loss": 0.12705573439598083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7254836559295654, "epoch": 6.47, "learning_rate": 1.9634638865408098e-05, "loss": 0.6014, "step": 7649, "task_loss": 0.4454666078090668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5229495167732239, "epoch": 6.47, "learning_rate": 1.9629942706865784e-05, "loss": 0.537, "step": 7650, "task_loss": 0.3589460551738739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5509681105613708, "epoch": 6.47, "learning_rate": 1.9625246548323474e-05, "loss": 0.6831, "step": 7651, "task_loss": 0.49041417241096497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.801722526550293, "epoch": 6.47, "learning_rate": 1.962055038978116e-05, "loss": 0.7752, "step": 7652, "task_loss": 0.9099933505058289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4629775285720825, "epoch": 6.47, "learning_rate": 1.9615854231238847e-05, "loss": 0.6677, "step": 7653, "task_loss": 0.09740877896547318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7335582375526428, "epoch": 6.47, "learning_rate": 1.9611158072696533e-05, "loss": 0.8472, "step": 7654, "task_loss": 0.50173020362854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9363397359848022, "epoch": 6.47, "learning_rate": 1.9606461914154223e-05, "loss": 0.6782, "step": 7655, "task_loss": 0.9497846961021423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6062206029891968, "epoch": 6.47, "learning_rate": 1.9601765755611912e-05, "loss": 0.5717, "step": 7656, "task_loss": 1.6038775444030762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8302662968635559, "epoch": 6.47, "learning_rate": 1.95970695970696e-05, "loss": 0.8001, "step": 7657, "task_loss": 0.9078373312950134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0615894794464111, "epoch": 6.47, "learning_rate": 1.9592373438527285e-05, "loss": 0.8409, "step": 7658, "task_loss": 1.7548911571502686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.405248761177063, "epoch": 6.47, "learning_rate": 1.958767727998497e-05, "loss": 0.5163, "step": 7659, "task_loss": 0.7649040222167969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4592381417751312, "epoch": 6.47, "learning_rate": 1.958298112144266e-05, "loss": 0.6726, "step": 7660, "task_loss": 0.48105576634407043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39776989817619324, "epoch": 6.48, "learning_rate": 1.9578284962900347e-05, "loss": 0.4929, "step": 7661, "task_loss": 0.5908452868461609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.555922269821167, "epoch": 6.48, "learning_rate": 1.9573588804358037e-05, "loss": 0.5575, "step": 7662, "task_loss": 0.618859052658081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5594500303268433, "epoch": 6.48, "learning_rate": 1.9568892645815723e-05, "loss": 0.6148, "step": 7663, "task_loss": 0.29954764246940613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8110962510108948, "epoch": 6.48, "learning_rate": 1.9564196487273413e-05, "loss": 0.8713, "step": 7664, "task_loss": 1.527608871459961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.048767328262329, "epoch": 6.48, "learning_rate": 1.95595003287311e-05, "loss": 0.729, "step": 7665, "task_loss": 1.3225458860397339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.117417573928833, "epoch": 6.48, "learning_rate": 1.9554804170188786e-05, "loss": 0.7877, "step": 7666, "task_loss": 0.7836707830429077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6287204623222351, "epoch": 6.48, "learning_rate": 1.9550108011646472e-05, "loss": 0.5763, "step": 7667, "task_loss": 0.3835512399673462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46199965476989746, "epoch": 6.48, "learning_rate": 1.9545411853104162e-05, "loss": 0.5965, "step": 7668, "task_loss": 0.5927996635437012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4926247298717499, "epoch": 6.48, "learning_rate": 1.954071569456185e-05, "loss": 0.5164, "step": 7669, "task_loss": 0.3627850413322449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5343976616859436, "epoch": 6.48, "learning_rate": 1.9536019536019538e-05, "loss": 0.5543, "step": 7670, "task_loss": 0.8605322241783142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7932080626487732, "epoch": 6.48, "learning_rate": 1.9531323377477224e-05, "loss": 0.8739, "step": 7671, "task_loss": 0.6112082600593567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5687568187713623, "epoch": 6.48, "learning_rate": 1.952662721893491e-05, "loss": 0.6046, "step": 7672, "task_loss": 1.0097674131393433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5058267116546631, "epoch": 6.49, "learning_rate": 1.95219310603926e-05, "loss": 0.5113, "step": 7673, "task_loss": 0.2578563988208771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8922109007835388, "epoch": 6.49, "learning_rate": 1.9517234901850287e-05, "loss": 0.8291, "step": 7674, "task_loss": 0.5455604791641235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5983232259750366, "epoch": 6.49, "learning_rate": 1.9512538743307977e-05, "loss": 0.727, "step": 7675, "task_loss": 0.624460756778717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6089338064193726, "epoch": 6.49, "learning_rate": 1.9507842584765663e-05, "loss": 0.6053, "step": 7676, "task_loss": 0.6265380382537842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6770689487457275, "epoch": 6.49, "learning_rate": 1.9503146426223353e-05, "loss": 0.7517, "step": 7677, "task_loss": 0.6195037961006165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.755669355392456, "epoch": 6.49, "learning_rate": 1.9498450267681036e-05, "loss": 0.759, "step": 7678, "task_loss": 0.6240878105163574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6775854229927063, "epoch": 6.49, "learning_rate": 1.9493754109138725e-05, "loss": 0.7755, "step": 7679, "task_loss": 0.7836315631866455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6132378578186035, "epoch": 6.49, "learning_rate": 1.948905795059641e-05, "loss": 0.7949, "step": 7680, "task_loss": 0.8598362803459167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3791685104370117, "epoch": 6.49, "learning_rate": 1.94843617920541e-05, "loss": 0.5025, "step": 7681, "task_loss": 0.3562249541282654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7083181142807007, "epoch": 6.49, "learning_rate": 1.9479665633511788e-05, "loss": 0.8869, "step": 7682, "task_loss": 1.132312297821045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33176177740097046, "epoch": 6.49, "learning_rate": 1.9474969474969477e-05, "loss": 0.6685, "step": 7683, "task_loss": 0.1421789973974228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5174624919891357, "epoch": 6.5, "learning_rate": 1.9470273316427164e-05, "loss": 0.6956, "step": 7684, "task_loss": 1.3110910654067993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7782166004180908, "epoch": 6.5, "learning_rate": 1.946557715788485e-05, "loss": 0.6927, "step": 7685, "task_loss": 1.032650351524353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5560308694839478, "epoch": 6.5, "learning_rate": 1.946088099934254e-05, "loss": 0.7004, "step": 7686, "task_loss": 0.3273417055606842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4823813736438751, "epoch": 6.5, "learning_rate": 1.9456184840800226e-05, "loss": 0.6011, "step": 7687, "task_loss": 0.8553427457809448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5441175699234009, "epoch": 6.5, "learning_rate": 1.9451488682257916e-05, "loss": 0.5356, "step": 7688, "task_loss": 0.4626624286174774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4601706564426422, "epoch": 6.5, "learning_rate": 1.9446792523715602e-05, "loss": 0.5496, "step": 7689, "task_loss": 0.8929843306541443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.246556043624878, "epoch": 6.5, "learning_rate": 1.944209636517329e-05, "loss": 0.8597, "step": 7690, "task_loss": 1.6109899282455444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9716125726699829, "epoch": 6.5, "learning_rate": 1.9437400206630975e-05, "loss": 0.6233, "step": 7691, "task_loss": 1.5642586946487427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4217182397842407, "epoch": 6.5, "learning_rate": 1.9432704048088665e-05, "loss": 0.7356, "step": 7692, "task_loss": 0.18053463101387024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9148034453392029, "epoch": 6.5, "learning_rate": 1.942800788954635e-05, "loss": 0.65, "step": 7693, "task_loss": 0.9065867066383362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39484691619873047, "epoch": 6.5, "learning_rate": 1.942331173100404e-05, "loss": 0.535, "step": 7694, "task_loss": 0.3910062611103058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4072638154029846, "epoch": 6.5, "learning_rate": 1.9418615572461727e-05, "loss": 0.7227, "step": 7695, "task_loss": 0.19216139614582062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6738665103912354, "epoch": 6.51, "learning_rate": 1.9413919413919417e-05, "loss": 0.8204, "step": 7696, "task_loss": 1.1271852254867554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9318728446960449, "epoch": 6.51, "learning_rate": 1.9409223255377103e-05, "loss": 0.8211, "step": 7697, "task_loss": 1.1273378133773804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8453035950660706, "epoch": 6.51, "learning_rate": 1.940452709683479e-05, "loss": 0.9213, "step": 7698, "task_loss": 0.8953791856765747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6005862355232239, "epoch": 6.51, "learning_rate": 1.9399830938292476e-05, "loss": 0.7097, "step": 7699, "task_loss": 0.3956902027130127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5311049222946167, "epoch": 6.51, "learning_rate": 1.9395134779750165e-05, "loss": 0.4854, "step": 7700, "task_loss": 0.5844699144363403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7759793996810913, "epoch": 6.51, "learning_rate": 1.9390438621207855e-05, "loss": 0.9086, "step": 7701, "task_loss": 1.5022810697555542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7353447675704956, "epoch": 6.51, "learning_rate": 1.938574246266554e-05, "loss": 0.6588, "step": 7702, "task_loss": 1.1515512466430664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.505161702632904, "epoch": 6.51, "learning_rate": 1.9381046304123228e-05, "loss": 0.7664, "step": 7703, "task_loss": 0.5830965042114258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6094008684158325, "epoch": 6.51, "learning_rate": 1.9376350145580914e-05, "loss": 0.7476, "step": 7704, "task_loss": 0.8544144630432129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5341677665710449, "epoch": 6.51, "learning_rate": 1.9371653987038604e-05, "loss": 0.6676, "step": 7705, "task_loss": 0.3118109107017517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7634299993515015, "epoch": 6.51, "learning_rate": 1.936695782849629e-05, "loss": 0.6378, "step": 7706, "task_loss": 0.21533836424350739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5390138030052185, "epoch": 6.51, "learning_rate": 1.936226166995398e-05, "loss": 0.612, "step": 7707, "task_loss": 0.7678750157356262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6526979207992554, "epoch": 6.52, "learning_rate": 1.9357565511411666e-05, "loss": 0.8864, "step": 7708, "task_loss": 1.2967112064361572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.021026849746704, "epoch": 6.52, "learning_rate": 1.9352869352869356e-05, "loss": 0.7151, "step": 7709, "task_loss": 0.949704647064209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7584649324417114, "epoch": 6.52, "learning_rate": 1.934817319432704e-05, "loss": 0.7535, "step": 7710, "task_loss": 1.3101047277450562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8961614370346069, "epoch": 6.52, "learning_rate": 1.934347703578473e-05, "loss": 0.992, "step": 7711, "task_loss": 0.3373355567455292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.694952130317688, "epoch": 6.52, "learning_rate": 1.9338780877242415e-05, "loss": 0.6728, "step": 7712, "task_loss": 0.7840176820755005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5063399076461792, "epoch": 6.52, "learning_rate": 1.9334084718700105e-05, "loss": 0.7218, "step": 7713, "task_loss": 0.5455149412155151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9447771906852722, "epoch": 6.52, "learning_rate": 1.932938856015779e-05, "loss": 0.6089, "step": 7714, "task_loss": 0.38551315665245056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6690858006477356, "epoch": 6.52, "learning_rate": 1.932469240161548e-05, "loss": 0.7466, "step": 7715, "task_loss": 0.705573558807373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7680718302726746, "epoch": 6.52, "learning_rate": 1.9319996243073167e-05, "loss": 0.626, "step": 7716, "task_loss": 0.6752618551254272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.518124520778656, "epoch": 6.52, "learning_rate": 1.9315300084530854e-05, "loss": 0.6323, "step": 7717, "task_loss": 0.2502838969230652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5123516321182251, "epoch": 6.52, "learning_rate": 1.9310603925988543e-05, "loss": 0.7059, "step": 7718, "task_loss": 0.583441972732544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0326149463653564, "epoch": 6.52, "learning_rate": 1.930590776744623e-05, "loss": 0.8723, "step": 7719, "task_loss": 1.4306726455688477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5263512134552002, "epoch": 6.53, "learning_rate": 1.930121160890392e-05, "loss": 0.6604, "step": 7720, "task_loss": 1.038735032081604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.660688042640686, "epoch": 6.53, "learning_rate": 1.9296515450361606e-05, "loss": 0.7018, "step": 7721, "task_loss": 0.7304521799087524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8452425003051758, "epoch": 6.53, "learning_rate": 1.9291819291819292e-05, "loss": 0.8457, "step": 7722, "task_loss": 1.0710935592651367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37445998191833496, "epoch": 6.53, "learning_rate": 1.928712313327698e-05, "loss": 0.4734, "step": 7723, "task_loss": 0.1781979352235794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8441646099090576, "epoch": 6.53, "learning_rate": 1.9282426974734668e-05, "loss": 0.8511, "step": 7724, "task_loss": 0.9229071736335754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0095365047454834, "epoch": 6.53, "learning_rate": 1.9277730816192354e-05, "loss": 0.7891, "step": 7725, "task_loss": 0.9019142389297485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8880236148834229, "epoch": 6.53, "learning_rate": 1.9273034657650044e-05, "loss": 0.6387, "step": 7726, "task_loss": 0.5475541949272156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.448500394821167, "epoch": 6.53, "learning_rate": 1.926833849910773e-05, "loss": 0.7056, "step": 7727, "task_loss": 0.61821448802948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7682489156723022, "epoch": 6.53, "learning_rate": 1.926364234056542e-05, "loss": 0.6187, "step": 7728, "task_loss": 0.5185555815696716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3490172028541565, "epoch": 6.53, "learning_rate": 1.9258946182023103e-05, "loss": 0.4752, "step": 7729, "task_loss": 0.9956402778625488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9221083521842957, "epoch": 6.53, "learning_rate": 1.9254250023480793e-05, "loss": 0.7237, "step": 7730, "task_loss": 0.5689821839332581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48330754041671753, "epoch": 6.53, "learning_rate": 1.924955386493848e-05, "loss": 0.6491, "step": 7731, "task_loss": 0.5975372791290283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5969243049621582, "epoch": 6.54, "learning_rate": 1.924485770639617e-05, "loss": 0.6489, "step": 7732, "task_loss": 1.021194577217102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8132016062736511, "epoch": 6.54, "learning_rate": 1.924016154785386e-05, "loss": 0.7035, "step": 7733, "task_loss": 0.2922912538051605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6305222511291504, "epoch": 6.54, "learning_rate": 1.9235465389311545e-05, "loss": 0.5802, "step": 7734, "task_loss": 0.24204514920711517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48668110370635986, "epoch": 6.54, "learning_rate": 1.923076923076923e-05, "loss": 0.6142, "step": 7735, "task_loss": 0.544009804725647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47561660408973694, "epoch": 6.54, "learning_rate": 1.9226073072226918e-05, "loss": 0.4658, "step": 7736, "task_loss": 1.0754733085632324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7399841547012329, "epoch": 6.54, "learning_rate": 1.9221376913684607e-05, "loss": 0.6586, "step": 7737, "task_loss": 0.3694024384021759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2052721977233887, "epoch": 6.54, "learning_rate": 1.9216680755142294e-05, "loss": 0.6726, "step": 7738, "task_loss": 0.6973388195037842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7379342317581177, "epoch": 6.54, "learning_rate": 1.9211984596599984e-05, "loss": 0.5774, "step": 7739, "task_loss": 0.842318058013916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7513180375099182, "epoch": 6.54, "learning_rate": 1.920728843805767e-05, "loss": 0.5889, "step": 7740, "task_loss": 0.7036629319190979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7462599277496338, "epoch": 6.54, "learning_rate": 1.9202592279515356e-05, "loss": 0.7141, "step": 7741, "task_loss": 1.192251205444336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2576596140861511, "epoch": 6.54, "learning_rate": 1.9197896120973043e-05, "loss": 0.564, "step": 7742, "task_loss": 0.17376968264579773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44842156767845154, "epoch": 6.54, "learning_rate": 1.9193199962430732e-05, "loss": 0.5997, "step": 7743, "task_loss": 0.4294542670249939 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8199923038482666, "epoch": 6.55, "learning_rate": 1.918850380388842e-05, "loss": 0.6085, "step": 7744, "task_loss": 0.4230400323867798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4166744351387024, "epoch": 6.55, "learning_rate": 1.918380764534611e-05, "loss": 0.7884, "step": 7745, "task_loss": 0.33389294147491455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9786747097969055, "epoch": 6.55, "learning_rate": 1.9179111486803795e-05, "loss": 1.0279, "step": 7746, "task_loss": 0.7391691207885742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1831110715866089, "epoch": 6.55, "learning_rate": 1.9174415328261484e-05, "loss": 0.8767, "step": 7747, "task_loss": 1.1013160943984985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5818350315093994, "epoch": 6.55, "learning_rate": 1.916971916971917e-05, "loss": 0.8973, "step": 7748, "task_loss": 1.5658382177352905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8816795349121094, "epoch": 6.55, "learning_rate": 1.9165023011176857e-05, "loss": 0.8025, "step": 7749, "task_loss": 0.5640991926193237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5066423416137695, "epoch": 6.55, "learning_rate": 1.9160326852634547e-05, "loss": 0.6405, "step": 7750, "task_loss": 0.9874940514564514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6522372961044312, "epoch": 6.55, "learning_rate": 1.9155630694092233e-05, "loss": 0.57, "step": 7751, "task_loss": 0.7858275175094604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7292498350143433, "epoch": 6.55, "learning_rate": 1.9150934535549923e-05, "loss": 0.7435, "step": 7752, "task_loss": 0.9505571722984314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6987418532371521, "epoch": 6.55, "learning_rate": 1.914623837700761e-05, "loss": 0.8434, "step": 7753, "task_loss": 1.1642183065414429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.725627064704895, "epoch": 6.55, "learning_rate": 1.9141542218465296e-05, "loss": 0.6585, "step": 7754, "task_loss": 0.7421123385429382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.024196743965149, "epoch": 6.56, "learning_rate": 1.9136846059922982e-05, "loss": 0.8153, "step": 7755, "task_loss": 0.8298729658126831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6325453519821167, "epoch": 6.56, "learning_rate": 1.913214990138067e-05, "loss": 0.771, "step": 7756, "task_loss": 1.211829662322998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4025532007217407, "epoch": 6.56, "learning_rate": 1.9127453742838358e-05, "loss": 0.7802, "step": 7757, "task_loss": 0.8089741468429565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5815826058387756, "epoch": 6.56, "learning_rate": 1.9122757584296048e-05, "loss": 0.7742, "step": 7758, "task_loss": 0.8771381378173828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5150001049041748, "epoch": 6.56, "learning_rate": 1.9118061425753734e-05, "loss": 0.5831, "step": 7759, "task_loss": 0.11793205142021179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47928598523139954, "epoch": 6.56, "learning_rate": 1.9113365267211424e-05, "loss": 0.7505, "step": 7760, "task_loss": 0.6951413154602051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6092385053634644, "epoch": 6.56, "learning_rate": 1.9108669108669107e-05, "loss": 0.8158, "step": 7761, "task_loss": 1.0236263275146484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5398557186126709, "epoch": 6.56, "learning_rate": 1.9103972950126796e-05, "loss": 0.5928, "step": 7762, "task_loss": 0.3768951892852783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9932535886764526, "epoch": 6.56, "learning_rate": 1.9099276791584486e-05, "loss": 0.8049, "step": 7763, "task_loss": 1.4403581619262695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4322192072868347, "epoch": 6.56, "learning_rate": 1.9094580633042173e-05, "loss": 0.5641, "step": 7764, "task_loss": 0.6881524920463562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.586004376411438, "epoch": 6.56, "learning_rate": 1.9089884474499862e-05, "loss": 0.8951, "step": 7765, "task_loss": 1.2353109121322632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4155287742614746, "epoch": 6.56, "learning_rate": 1.908518831595755e-05, "loss": 0.6874, "step": 7766, "task_loss": 0.3136499524116516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.24655117094516754, "epoch": 6.57, "learning_rate": 1.9080492157415235e-05, "loss": 0.6164, "step": 7767, "task_loss": 0.26909416913986206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32035544514656067, "epoch": 6.57, "learning_rate": 1.907579599887292e-05, "loss": 0.621, "step": 7768, "task_loss": 0.14241869747638702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40150219202041626, "epoch": 6.57, "learning_rate": 1.907109984033061e-05, "loss": 0.4916, "step": 7769, "task_loss": 0.026581592857837677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8084406852722168, "epoch": 6.57, "learning_rate": 1.9066403681788297e-05, "loss": 0.7653, "step": 7770, "task_loss": 0.5166022777557373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9504649639129639, "epoch": 6.57, "learning_rate": 1.9061707523245987e-05, "loss": 0.882, "step": 7771, "task_loss": 1.264012098312378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45429086685180664, "epoch": 6.57, "learning_rate": 1.9057011364703673e-05, "loss": 0.5527, "step": 7772, "task_loss": 0.7661442160606384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.6153054237365723, "epoch": 6.57, "learning_rate": 1.905231520616136e-05, "loss": 0.8864, "step": 7773, "task_loss": 1.5726394653320312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4240601062774658, "epoch": 6.57, "learning_rate": 1.9047619047619046e-05, "loss": 0.7244, "step": 7774, "task_loss": 0.6481456160545349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7499233484268188, "epoch": 6.57, "learning_rate": 1.9042922889076736e-05, "loss": 0.6163, "step": 7775, "task_loss": 1.3778314590454102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5426812171936035, "epoch": 6.57, "learning_rate": 1.9038226730534422e-05, "loss": 0.6203, "step": 7776, "task_loss": 0.3736880123615265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7225120067596436, "epoch": 6.57, "learning_rate": 1.9033530571992112e-05, "loss": 0.7105, "step": 7777, "task_loss": 0.9015071392059326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5645957589149475, "epoch": 6.57, "learning_rate": 1.90288344134498e-05, "loss": 0.6147, "step": 7778, "task_loss": 2.17633056640625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4473418593406677, "epoch": 6.58, "learning_rate": 1.9024138254907488e-05, "loss": 0.6893, "step": 7779, "task_loss": 0.5662444829940796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6052767038345337, "epoch": 6.58, "learning_rate": 1.9019442096365174e-05, "loss": 0.6129, "step": 7780, "task_loss": 0.5424375534057617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7742874026298523, "epoch": 6.58, "learning_rate": 1.901474593782286e-05, "loss": 0.7066, "step": 7781, "task_loss": 0.5439494848251343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4124906063079834, "epoch": 6.58, "learning_rate": 1.901004977928055e-05, "loss": 0.7528, "step": 7782, "task_loss": 0.2904655933380127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8627408742904663, "epoch": 6.58, "learning_rate": 1.9005353620738237e-05, "loss": 0.5664, "step": 7783, "task_loss": 1.0516464710235596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0799133777618408, "epoch": 6.58, "learning_rate": 1.9000657462195926e-05, "loss": 0.9497, "step": 7784, "task_loss": 1.2244842052459717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7119284868240356, "epoch": 6.58, "learning_rate": 1.8995961303653613e-05, "loss": 0.492, "step": 7785, "task_loss": 0.3710053265094757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4329224228858948, "epoch": 6.58, "learning_rate": 1.89912651451113e-05, "loss": 0.7071, "step": 7786, "task_loss": 0.29786747694015503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.089606523513794, "epoch": 6.58, "learning_rate": 1.8986568986568985e-05, "loss": 0.7881, "step": 7787, "task_loss": 1.7864620685577393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.730303943157196, "epoch": 6.58, "learning_rate": 1.8981872828026675e-05, "loss": 0.7081, "step": 7788, "task_loss": 1.2710926532745361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6397780179977417, "epoch": 6.58, "learning_rate": 1.897717666948436e-05, "loss": 0.53, "step": 7789, "task_loss": 0.7480977177619934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7706957459449768, "epoch": 6.58, "learning_rate": 1.897248051094205e-05, "loss": 0.7404, "step": 7790, "task_loss": 0.5489586591720581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5748846530914307, "epoch": 6.59, "learning_rate": 1.8967784352399738e-05, "loss": 0.6384, "step": 7791, "task_loss": 0.9752752184867859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.4009783267974854, "epoch": 6.59, "learning_rate": 1.8963088193857424e-05, "loss": 0.8965, "step": 7792, "task_loss": 1.208162546157837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.093932032585144, "epoch": 6.59, "learning_rate": 1.895839203531511e-05, "loss": 0.8001, "step": 7793, "task_loss": 0.6970321536064148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8212417364120483, "epoch": 6.59, "learning_rate": 1.89536958767728e-05, "loss": 0.7331, "step": 7794, "task_loss": 0.5222839117050171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4880121946334839, "epoch": 6.59, "learning_rate": 1.894899971823049e-05, "loss": 0.6937, "step": 7795, "task_loss": 0.46124598383903503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7117540240287781, "epoch": 6.59, "learning_rate": 1.8944303559688176e-05, "loss": 0.7014, "step": 7796, "task_loss": 0.8390952348709106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5428898930549622, "epoch": 6.59, "learning_rate": 1.8939607401145866e-05, "loss": 0.5328, "step": 7797, "task_loss": 0.44260475039482117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4220261573791504, "epoch": 6.59, "learning_rate": 1.8934911242603552e-05, "loss": 0.5497, "step": 7798, "task_loss": 0.45478734374046326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38427209854125977, "epoch": 6.59, "learning_rate": 1.893021508406124e-05, "loss": 0.6655, "step": 7799, "task_loss": 0.5810513496398926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5509493947029114, "epoch": 6.59, "learning_rate": 1.8925518925518925e-05, "loss": 0.5991, "step": 7800, "task_loss": 0.2551139295101166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7189676761627197, "epoch": 6.59, "learning_rate": 1.8920822766976615e-05, "loss": 0.6607, "step": 7801, "task_loss": 0.7974716424942017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6117703914642334, "epoch": 6.59, "learning_rate": 1.89161266084343e-05, "loss": 0.8141, "step": 7802, "task_loss": 1.3918877840042114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6720221638679504, "epoch": 6.6, "learning_rate": 1.891143044989199e-05, "loss": 0.6829, "step": 7803, "task_loss": 1.3901865482330322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6849770545959473, "epoch": 6.6, "learning_rate": 1.8906734291349677e-05, "loss": 0.748, "step": 7804, "task_loss": 0.7102108597755432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0275851488113403, "epoch": 6.6, "learning_rate": 1.8902038132807363e-05, "loss": 0.8271, "step": 7805, "task_loss": 1.823226809501648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9765980243682861, "epoch": 6.6, "learning_rate": 1.889734197426505e-05, "loss": 0.994, "step": 7806, "task_loss": 1.4691439867019653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8120265007019043, "epoch": 6.6, "learning_rate": 1.889264581572274e-05, "loss": 0.7498, "step": 7807, "task_loss": 1.2737910747528076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7573387622833252, "epoch": 6.6, "learning_rate": 1.8887949657180426e-05, "loss": 0.607, "step": 7808, "task_loss": 0.36787307262420654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6707810163497925, "epoch": 6.6, "learning_rate": 1.8883253498638115e-05, "loss": 0.6547, "step": 7809, "task_loss": 1.5368835926055908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8662512302398682, "epoch": 6.6, "learning_rate": 1.8878557340095805e-05, "loss": 0.707, "step": 7810, "task_loss": 0.48592421412467957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6783595681190491, "epoch": 6.6, "learning_rate": 1.887386118155349e-05, "loss": 0.7737, "step": 7811, "task_loss": 0.9964863657951355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5295761823654175, "epoch": 6.6, "learning_rate": 1.8869165023011178e-05, "loss": 0.6398, "step": 7812, "task_loss": 0.5722056031227112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6706504225730896, "epoch": 6.6, "learning_rate": 1.8864468864468864e-05, "loss": 0.663, "step": 7813, "task_loss": 0.5051553249359131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49735569953918457, "epoch": 6.6, "learning_rate": 1.8859772705926554e-05, "loss": 0.5882, "step": 7814, "task_loss": 0.09332499653100967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5509010553359985, "epoch": 6.61, "learning_rate": 1.885507654738424e-05, "loss": 0.7669, "step": 7815, "task_loss": 1.321014642715454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48622381687164307, "epoch": 6.61, "learning_rate": 1.885038038884193e-05, "loss": 0.5479, "step": 7816, "task_loss": 0.10827895998954773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7020886540412903, "epoch": 6.61, "learning_rate": 1.8845684230299616e-05, "loss": 0.6131, "step": 7817, "task_loss": 1.8020190000534058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.569518506526947, "epoch": 6.61, "learning_rate": 1.8840988071757303e-05, "loss": 0.5826, "step": 7818, "task_loss": 0.5740954279899597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6012476682662964, "epoch": 6.61, "learning_rate": 1.883629191321499e-05, "loss": 0.5253, "step": 7819, "task_loss": 0.5411742925643921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5878693461418152, "epoch": 6.61, "learning_rate": 1.883159575467268e-05, "loss": 0.6703, "step": 7820, "task_loss": 1.0136882066726685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7688875198364258, "epoch": 6.61, "learning_rate": 1.8826899596130365e-05, "loss": 0.708, "step": 7821, "task_loss": 0.5600406527519226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7223972082138062, "epoch": 6.61, "learning_rate": 1.8822203437588055e-05, "loss": 0.7645, "step": 7822, "task_loss": 1.0445244312286377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5804098844528198, "epoch": 6.61, "learning_rate": 1.881750727904574e-05, "loss": 0.7467, "step": 7823, "task_loss": 0.5902911424636841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6116171479225159, "epoch": 6.61, "learning_rate": 1.8812811120503427e-05, "loss": 0.6299, "step": 7824, "task_loss": 0.8706544041633606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3333415389060974, "epoch": 6.61, "learning_rate": 1.8808114961961117e-05, "loss": 0.4031, "step": 7825, "task_loss": 0.19836963713169098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5704915523529053, "epoch": 6.61, "learning_rate": 1.8803418803418804e-05, "loss": 0.7307, "step": 7826, "task_loss": 1.2697862386703491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1924935579299927, "epoch": 6.62, "learning_rate": 1.8798722644876493e-05, "loss": 1.0402, "step": 7827, "task_loss": 1.1625478267669678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9154330492019653, "epoch": 6.62, "learning_rate": 1.879402648633418e-05, "loss": 0.8229, "step": 7828, "task_loss": 1.7832838296890259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7465558052062988, "epoch": 6.62, "learning_rate": 1.878933032779187e-05, "loss": 0.8399, "step": 7829, "task_loss": 0.6346486806869507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5442193746566772, "epoch": 6.62, "learning_rate": 1.8784634169249556e-05, "loss": 0.7907, "step": 7830, "task_loss": 0.4566959738731384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.727375328540802, "epoch": 6.62, "learning_rate": 1.8779938010707242e-05, "loss": 0.6436, "step": 7831, "task_loss": 0.8603447675704956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.195192575454712, "epoch": 6.62, "learning_rate": 1.877524185216493e-05, "loss": 0.736, "step": 7832, "task_loss": 0.4309762120246887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2756239175796509, "epoch": 6.62, "learning_rate": 1.8770545693622618e-05, "loss": 0.6365, "step": 7833, "task_loss": 0.7366514205932617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7406010031700134, "epoch": 6.62, "learning_rate": 1.8765849535080304e-05, "loss": 0.8529, "step": 7834, "task_loss": 0.6247768402099609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5907922983169556, "epoch": 6.62, "learning_rate": 1.8761153376537994e-05, "loss": 0.7895, "step": 7835, "task_loss": 1.0220012664794922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7191261649131775, "epoch": 6.62, "learning_rate": 1.875645721799568e-05, "loss": 0.7889, "step": 7836, "task_loss": 0.9463446140289307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2336615324020386, "epoch": 6.62, "learning_rate": 1.8751761059453367e-05, "loss": 0.8606, "step": 7837, "task_loss": 0.7981645464897156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.017031192779541, "epoch": 6.63, "learning_rate": 1.8747064900911053e-05, "loss": 0.7408, "step": 7838, "task_loss": 0.42498043179512024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7098329067230225, "epoch": 6.63, "learning_rate": 1.8742368742368743e-05, "loss": 0.7503, "step": 7839, "task_loss": 1.3833568096160889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9685272574424744, "epoch": 6.63, "learning_rate": 1.8737672583826433e-05, "loss": 0.715, "step": 7840, "task_loss": 0.8682403564453125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.18337582051753998, "epoch": 6.63, "learning_rate": 1.873297642528412e-05, "loss": 0.5422, "step": 7841, "task_loss": 0.05008570849895477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6054979562759399, "epoch": 6.63, "learning_rate": 1.872828026674181e-05, "loss": 0.7878, "step": 7842, "task_loss": 0.9729222059249878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8246788382530212, "epoch": 6.63, "learning_rate": 1.8723584108199495e-05, "loss": 0.7773, "step": 7843, "task_loss": 1.0098485946655273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7287330627441406, "epoch": 6.63, "learning_rate": 1.871888794965718e-05, "loss": 0.6405, "step": 7844, "task_loss": 1.2139191627502441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.918885350227356, "epoch": 6.63, "learning_rate": 1.8714191791114868e-05, "loss": 0.8636, "step": 7845, "task_loss": 0.9346209764480591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9642723798751831, "epoch": 6.63, "learning_rate": 1.8709495632572557e-05, "loss": 0.9287, "step": 7846, "task_loss": 2.3928534984588623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0616816282272339, "epoch": 6.63, "learning_rate": 1.8704799474030244e-05, "loss": 0.6854, "step": 7847, "task_loss": 0.6230658888816833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5114741921424866, "epoch": 6.63, "learning_rate": 1.8700103315487934e-05, "loss": 0.5382, "step": 7848, "task_loss": 0.5449884533882141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6017237305641174, "epoch": 6.63, "learning_rate": 1.869540715694562e-05, "loss": 0.7467, "step": 7849, "task_loss": 0.6700220704078674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5118065476417542, "epoch": 6.64, "learning_rate": 1.8690710998403306e-05, "loss": 0.6029, "step": 7850, "task_loss": 1.098567247390747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0044565200805664, "epoch": 6.64, "learning_rate": 1.8686014839860993e-05, "loss": 0.7462, "step": 7851, "task_loss": 0.4505966603755951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1425902843475342, "epoch": 6.64, "learning_rate": 1.8681318681318682e-05, "loss": 0.8703, "step": 7852, "task_loss": 1.5588202476501465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.154496669769287, "epoch": 6.64, "learning_rate": 1.867662252277637e-05, "loss": 0.7315, "step": 7853, "task_loss": 0.9514247179031372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6953299641609192, "epoch": 6.64, "learning_rate": 1.867192636423406e-05, "loss": 0.7467, "step": 7854, "task_loss": 0.10017350316047668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8484282493591309, "epoch": 6.64, "learning_rate": 1.8667230205691748e-05, "loss": 0.6054, "step": 7855, "task_loss": 0.9276469349861145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7772078514099121, "epoch": 6.64, "learning_rate": 1.866253404714943e-05, "loss": 0.8689, "step": 7856, "task_loss": 1.471656322479248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5349969267845154, "epoch": 6.64, "learning_rate": 1.865783788860712e-05, "loss": 0.5137, "step": 7857, "task_loss": 0.3225862383842468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7518267631530762, "epoch": 6.64, "learning_rate": 1.8653141730064807e-05, "loss": 0.7698, "step": 7858, "task_loss": 0.16990090906620026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.038450002670288, "epoch": 6.64, "learning_rate": 1.8648445571522497e-05, "loss": 0.7956, "step": 7859, "task_loss": 1.3283789157867432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6540305614471436, "epoch": 6.64, "learning_rate": 1.8643749412980183e-05, "loss": 0.6496, "step": 7860, "task_loss": 1.014112949371338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3812454342842102, "epoch": 6.64, "learning_rate": 1.8639053254437873e-05, "loss": 0.5239, "step": 7861, "task_loss": 0.07563161849975586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7884107232093811, "epoch": 6.65, "learning_rate": 1.863435709589556e-05, "loss": 0.8871, "step": 7862, "task_loss": 0.8371365070343018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5028432607650757, "epoch": 6.65, "learning_rate": 1.8629660937353246e-05, "loss": 0.6914, "step": 7863, "task_loss": 0.7209850549697876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6658700108528137, "epoch": 6.65, "learning_rate": 1.8624964778810932e-05, "loss": 0.6516, "step": 7864, "task_loss": 0.5867996215820312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.044297695159912, "epoch": 6.65, "learning_rate": 1.862026862026862e-05, "loss": 0.7, "step": 7865, "task_loss": 1.3117907047271729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5429205894470215, "epoch": 6.65, "learning_rate": 1.8615572461726308e-05, "loss": 0.6634, "step": 7866, "task_loss": 0.4262802302837372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5182880163192749, "epoch": 6.65, "learning_rate": 1.8610876303183998e-05, "loss": 0.559, "step": 7867, "task_loss": 0.9898430109024048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6334108710289001, "epoch": 6.65, "learning_rate": 1.8606180144641684e-05, "loss": 0.6794, "step": 7868, "task_loss": 0.623222827911377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36316877603530884, "epoch": 6.65, "learning_rate": 1.860148398609937e-05, "loss": 0.5834, "step": 7869, "task_loss": 0.5945814847946167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4099796414375305, "epoch": 6.65, "learning_rate": 1.8596787827557057e-05, "loss": 0.7464, "step": 7870, "task_loss": 1.4170829057693481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9255796670913696, "epoch": 6.65, "learning_rate": 1.8592091669014746e-05, "loss": 0.828, "step": 7871, "task_loss": 1.1185719966888428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.375649094581604, "epoch": 6.65, "learning_rate": 1.8587395510472436e-05, "loss": 0.6816, "step": 7872, "task_loss": 1.1951087713241577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8021020293235779, "epoch": 6.65, "learning_rate": 1.8582699351930122e-05, "loss": 0.6266, "step": 7873, "task_loss": 1.1499820947647095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.968069314956665, "epoch": 6.66, "learning_rate": 1.8578003193387812e-05, "loss": 0.8216, "step": 7874, "task_loss": 0.6735106110572815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9532564282417297, "epoch": 6.66, "learning_rate": 1.8573307034845495e-05, "loss": 0.8697, "step": 7875, "task_loss": 0.9637805819511414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6173240542411804, "epoch": 6.66, "learning_rate": 1.8568610876303185e-05, "loss": 0.6173, "step": 7876, "task_loss": 0.3193662464618683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5563900470733643, "epoch": 6.66, "learning_rate": 1.856391471776087e-05, "loss": 0.5993, "step": 7877, "task_loss": 0.9143386483192444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9904193878173828, "epoch": 6.66, "learning_rate": 1.855921855921856e-05, "loss": 0.7426, "step": 7878, "task_loss": 1.2717500925064087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.893269419670105, "epoch": 6.66, "learning_rate": 1.8554522400676247e-05, "loss": 0.6952, "step": 7879, "task_loss": 1.0186412334442139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7463105320930481, "epoch": 6.66, "learning_rate": 1.8549826242133937e-05, "loss": 0.6557, "step": 7880, "task_loss": 0.24177910387516022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7125462889671326, "epoch": 6.66, "learning_rate": 1.8545130083591623e-05, "loss": 0.7303, "step": 7881, "task_loss": 0.9803380966186523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5986932516098022, "epoch": 6.66, "learning_rate": 1.854043392504931e-05, "loss": 0.569, "step": 7882, "task_loss": 0.3532584607601166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5056746006011963, "epoch": 6.66, "learning_rate": 1.8535737766506996e-05, "loss": 0.5772, "step": 7883, "task_loss": 0.4824450612068176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7879825234413147, "epoch": 6.66, "learning_rate": 1.8531041607964686e-05, "loss": 0.8449, "step": 7884, "task_loss": 0.5586627721786499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6972548961639404, "epoch": 6.66, "learning_rate": 1.8526345449422372e-05, "loss": 1.0153, "step": 7885, "task_loss": 1.4040547609329224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9667524099349976, "epoch": 6.67, "learning_rate": 1.8521649290880062e-05, "loss": 0.6395, "step": 7886, "task_loss": 1.2911752462387085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40651336312294006, "epoch": 6.67, "learning_rate": 1.8516953132337748e-05, "loss": 0.6423, "step": 7887, "task_loss": 1.5284032821655273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5201629400253296, "epoch": 6.67, "learning_rate": 1.8512256973795435e-05, "loss": 0.786, "step": 7888, "task_loss": 0.8478072285652161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8067490458488464, "epoch": 6.67, "learning_rate": 1.8507560815253124e-05, "loss": 0.8156, "step": 7889, "task_loss": 1.1938022375106812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6006203293800354, "epoch": 6.67, "learning_rate": 1.850286465671081e-05, "loss": 0.5031, "step": 7890, "task_loss": 1.1408607959747314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5761297941207886, "epoch": 6.67, "learning_rate": 1.84981684981685e-05, "loss": 0.5076, "step": 7891, "task_loss": 0.16954344511032104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7096825838088989, "epoch": 6.67, "learning_rate": 1.8493472339626187e-05, "loss": 0.7061, "step": 7892, "task_loss": 0.4794929027557373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4434164762496948, "epoch": 6.67, "learning_rate": 1.8488776181083876e-05, "loss": 0.8057, "step": 7893, "task_loss": 1.0954362154006958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7106372117996216, "epoch": 6.67, "learning_rate": 1.8484080022541563e-05, "loss": 0.9266, "step": 7894, "task_loss": 1.4168815612792969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8033589124679565, "epoch": 6.67, "learning_rate": 1.847938386399925e-05, "loss": 0.6516, "step": 7895, "task_loss": 0.8186233639717102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2552391290664673, "epoch": 6.67, "learning_rate": 1.8474687705456935e-05, "loss": 0.5614, "step": 7896, "task_loss": 0.5207622647285461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0017409324645996, "epoch": 6.67, "learning_rate": 1.8469991546914625e-05, "loss": 0.8502, "step": 7897, "task_loss": 0.9991342425346375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5196003913879395, "epoch": 6.68, "learning_rate": 1.846529538837231e-05, "loss": 0.6412, "step": 7898, "task_loss": 0.7532137632369995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6942334771156311, "epoch": 6.68, "learning_rate": 1.846059922983e-05, "loss": 0.6951, "step": 7899, "task_loss": 1.0342955589294434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4767569601535797, "epoch": 6.68, "learning_rate": 1.8455903071287688e-05, "loss": 0.6507, "step": 7900, "task_loss": 0.33405637741088867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6834913492202759, "epoch": 6.68, "learning_rate": 1.8451206912745374e-05, "loss": 0.5266, "step": 7901, "task_loss": 0.44944554567337036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.795102596282959, "epoch": 6.68, "learning_rate": 1.8446510754203064e-05, "loss": 0.6234, "step": 7902, "task_loss": 0.8294305205345154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36145538091659546, "epoch": 6.68, "learning_rate": 1.844181459566075e-05, "loss": 0.5852, "step": 7903, "task_loss": 0.7357611656188965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5224359631538391, "epoch": 6.68, "learning_rate": 1.843711843711844e-05, "loss": 0.8215, "step": 7904, "task_loss": 1.740600824356079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5361582040786743, "epoch": 6.68, "learning_rate": 1.8432422278576126e-05, "loss": 0.5759, "step": 7905, "task_loss": 1.1038775444030762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46315792202949524, "epoch": 6.68, "learning_rate": 1.8427726120033816e-05, "loss": 0.7691, "step": 7906, "task_loss": 0.8215778470039368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5847641229629517, "epoch": 6.68, "learning_rate": 1.84230299614915e-05, "loss": 0.5499, "step": 7907, "task_loss": 0.5280740261077881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0607234239578247, "epoch": 6.68, "learning_rate": 1.841833380294919e-05, "loss": 0.6189, "step": 7908, "task_loss": 0.5141946077346802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4492446780204773, "epoch": 6.69, "learning_rate": 1.8413637644406875e-05, "loss": 0.6449, "step": 7909, "task_loss": 0.6091643571853638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5606503486633301, "epoch": 6.69, "learning_rate": 1.8408941485864564e-05, "loss": 0.6278, "step": 7910, "task_loss": 0.8519642353057861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5006494522094727, "epoch": 6.69, "learning_rate": 1.840424532732225e-05, "loss": 0.5528, "step": 7911, "task_loss": 0.4064910411834717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7476399540901184, "epoch": 6.69, "learning_rate": 1.839954916877994e-05, "loss": 0.5724, "step": 7912, "task_loss": 0.7460187077522278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7440192699432373, "epoch": 6.69, "learning_rate": 1.8394853010237627e-05, "loss": 0.5987, "step": 7913, "task_loss": 0.5108507871627808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6471086740493774, "epoch": 6.69, "learning_rate": 1.8390156851695313e-05, "loss": 0.8259, "step": 7914, "task_loss": 0.9180610775947571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45447859168052673, "epoch": 6.69, "learning_rate": 1.8385460693153e-05, "loss": 0.4492, "step": 7915, "task_loss": 1.2165700197219849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4651675224304199, "epoch": 6.69, "learning_rate": 1.838076453461069e-05, "loss": 0.5739, "step": 7916, "task_loss": 0.6536151766777039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6578589677810669, "epoch": 6.69, "learning_rate": 1.837606837606838e-05, "loss": 0.7741, "step": 7917, "task_loss": 0.43819135427474976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.522779107093811, "epoch": 6.69, "learning_rate": 1.8371372217526065e-05, "loss": 0.7505, "step": 7918, "task_loss": 0.20634883642196655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6950886249542236, "epoch": 6.69, "learning_rate": 1.8366676058983752e-05, "loss": 0.5553, "step": 7919, "task_loss": 0.7146080732345581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5709637403488159, "epoch": 6.69, "learning_rate": 1.8361979900441438e-05, "loss": 0.7515, "step": 7920, "task_loss": 0.9862561225891113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0511484146118164, "epoch": 6.7, "learning_rate": 1.8357283741899128e-05, "loss": 0.7478, "step": 7921, "task_loss": 0.7901477813720703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7660884857177734, "epoch": 6.7, "learning_rate": 1.8352587583356814e-05, "loss": 0.7075, "step": 7922, "task_loss": 0.17087645828723907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6559551954269409, "epoch": 6.7, "learning_rate": 1.8347891424814504e-05, "loss": 0.5412, "step": 7923, "task_loss": 0.9207844138145447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.647952139377594, "epoch": 6.7, "learning_rate": 1.834319526627219e-05, "loss": 0.7633, "step": 7924, "task_loss": 0.31597813963890076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.600567638874054, "epoch": 6.7, "learning_rate": 1.833849910772988e-05, "loss": 0.5374, "step": 7925, "task_loss": 0.7412456274032593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48294299840927124, "epoch": 6.7, "learning_rate": 1.8333802949187566e-05, "loss": 0.7038, "step": 7926, "task_loss": 0.6512618064880371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9077585935592651, "epoch": 6.7, "learning_rate": 1.8329106790645253e-05, "loss": 0.58, "step": 7927, "task_loss": 0.818070113658905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7654509544372559, "epoch": 6.7, "learning_rate": 1.832441063210294e-05, "loss": 0.5997, "step": 7928, "task_loss": 0.6148888468742371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.752141535282135, "epoch": 6.7, "learning_rate": 1.831971447356063e-05, "loss": 0.7585, "step": 7929, "task_loss": 0.5431429147720337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5761847496032715, "epoch": 6.7, "learning_rate": 1.8315018315018315e-05, "loss": 0.9759, "step": 7930, "task_loss": 0.9251492023468018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4284417927265167, "epoch": 6.7, "learning_rate": 1.8310322156476005e-05, "loss": 0.6786, "step": 7931, "task_loss": 0.4521709084510803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6372388601303101, "epoch": 6.7, "learning_rate": 1.830562599793369e-05, "loss": 0.7656, "step": 7932, "task_loss": 0.6418814659118652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7378064393997192, "epoch": 6.71, "learning_rate": 1.8300929839391377e-05, "loss": 0.7152, "step": 7933, "task_loss": 0.7725093960762024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3550605773925781, "epoch": 6.71, "learning_rate": 1.8296233680849067e-05, "loss": 0.5645, "step": 7934, "task_loss": 0.20052047073841095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4952535331249237, "epoch": 6.71, "learning_rate": 1.8291537522306753e-05, "loss": 0.728, "step": 7935, "task_loss": 1.3379993438720703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8407859802246094, "epoch": 6.71, "learning_rate": 1.8286841363764443e-05, "loss": 0.6271, "step": 7936, "task_loss": 1.1200388669967651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5017493963241577, "epoch": 6.71, "learning_rate": 1.828214520522213e-05, "loss": 0.6544, "step": 7937, "task_loss": 1.018393635749817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7072032690048218, "epoch": 6.71, "learning_rate": 1.827744904667982e-05, "loss": 0.5143, "step": 7938, "task_loss": 0.22628264129161835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1211700439453125, "epoch": 6.71, "learning_rate": 1.8272752888137502e-05, "loss": 0.6771, "step": 7939, "task_loss": 1.216538667678833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3882414102554321, "epoch": 6.71, "learning_rate": 1.8268056729595192e-05, "loss": 0.8722, "step": 7940, "task_loss": 1.213882565498352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.732886016368866, "epoch": 6.71, "learning_rate": 1.8263360571052878e-05, "loss": 0.9278, "step": 7941, "task_loss": 1.143445611000061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46582961082458496, "epoch": 6.71, "learning_rate": 1.8258664412510568e-05, "loss": 0.463, "step": 7942, "task_loss": 0.840313196182251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7372859716415405, "epoch": 6.71, "learning_rate": 1.8253968253968254e-05, "loss": 0.6092, "step": 7943, "task_loss": 0.642926812171936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39185595512390137, "epoch": 6.71, "learning_rate": 1.8249272095425944e-05, "loss": 0.8368, "step": 7944, "task_loss": 0.15820704400539398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6259014010429382, "epoch": 6.72, "learning_rate": 1.824457593688363e-05, "loss": 0.7203, "step": 7945, "task_loss": 0.6138855814933777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6734684705734253, "epoch": 6.72, "learning_rate": 1.8239879778341317e-05, "loss": 0.7938, "step": 7946, "task_loss": 0.7729646563529968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4140245318412781, "epoch": 6.72, "learning_rate": 1.8235183619799003e-05, "loss": 0.5701, "step": 7947, "task_loss": 0.4363038241863251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8473676443099976, "epoch": 6.72, "learning_rate": 1.8230487461256693e-05, "loss": 0.6726, "step": 7948, "task_loss": 1.1091704368591309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35762766003608704, "epoch": 6.72, "learning_rate": 1.8225791302714383e-05, "loss": 0.4719, "step": 7949, "task_loss": 0.4542601704597473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4779626131057739, "epoch": 6.72, "learning_rate": 1.822109514417207e-05, "loss": 0.5778, "step": 7950, "task_loss": 0.44276759028434753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6285742521286011, "epoch": 6.72, "learning_rate": 1.8216398985629755e-05, "loss": 0.7203, "step": 7951, "task_loss": 1.4765084981918335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7916806936264038, "epoch": 6.72, "learning_rate": 1.821170282708744e-05, "loss": 0.6717, "step": 7952, "task_loss": 0.7570236921310425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5416487455368042, "epoch": 6.72, "learning_rate": 1.820700666854513e-05, "loss": 0.6215, "step": 7953, "task_loss": 0.5492072701454163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4159480333328247, "epoch": 6.72, "learning_rate": 1.8202310510002818e-05, "loss": 0.5407, "step": 7954, "task_loss": 0.14076299965381622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7908387780189514, "epoch": 6.72, "learning_rate": 1.8197614351460507e-05, "loss": 0.7035, "step": 7955, "task_loss": 0.7409909963607788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4786747694015503, "epoch": 6.72, "learning_rate": 1.8192918192918194e-05, "loss": 0.5884, "step": 7956, "task_loss": 0.734045147895813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.20350435376167297, "epoch": 6.73, "learning_rate": 1.8188222034375883e-05, "loss": 0.8247, "step": 7957, "task_loss": 0.5917913913726807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6905925273895264, "epoch": 6.73, "learning_rate": 1.8183525875833566e-05, "loss": 0.6603, "step": 7958, "task_loss": 0.8659093379974365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7876279354095459, "epoch": 6.73, "learning_rate": 1.8178829717291256e-05, "loss": 0.6575, "step": 7959, "task_loss": 0.920160710811615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4959943890571594, "epoch": 6.73, "learning_rate": 1.8174133558748942e-05, "loss": 0.6051, "step": 7960, "task_loss": 0.5784982442855835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8834877014160156, "epoch": 6.73, "learning_rate": 1.8169437400206632e-05, "loss": 0.7876, "step": 7961, "task_loss": 0.8445250391960144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5220768451690674, "epoch": 6.73, "learning_rate": 1.816474124166432e-05, "loss": 0.6138, "step": 7962, "task_loss": 0.03741728141903877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8020431399345398, "epoch": 6.73, "learning_rate": 1.8160045083122008e-05, "loss": 0.4679, "step": 7963, "task_loss": 0.5293828845024109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9985148906707764, "epoch": 6.73, "learning_rate": 1.8155348924579695e-05, "loss": 0.6026, "step": 7964, "task_loss": 0.6721194386482239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48594018816947937, "epoch": 6.73, "learning_rate": 1.815065276603738e-05, "loss": 0.5099, "step": 7965, "task_loss": 0.8812195658683777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3626821041107178, "epoch": 6.73, "learning_rate": 1.814595660749507e-05, "loss": 0.679, "step": 7966, "task_loss": 0.5200273990631104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6759179830551147, "epoch": 6.73, "learning_rate": 1.8141260448952757e-05, "loss": 0.6171, "step": 7967, "task_loss": 0.718386709690094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45813196897506714, "epoch": 6.73, "learning_rate": 1.8136564290410447e-05, "loss": 0.6606, "step": 7968, "task_loss": 0.9075406193733215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7210231423377991, "epoch": 6.74, "learning_rate": 1.8131868131868133e-05, "loss": 0.5717, "step": 7969, "task_loss": 1.0642486810684204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7107598781585693, "epoch": 6.74, "learning_rate": 1.812717197332582e-05, "loss": 0.6304, "step": 7970, "task_loss": 0.49486109614372253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3140372037887573, "epoch": 6.74, "learning_rate": 1.8122475814783506e-05, "loss": 0.575, "step": 7971, "task_loss": 0.4729804992675781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0046931505203247, "epoch": 6.74, "learning_rate": 1.8117779656241195e-05, "loss": 0.8368, "step": 7972, "task_loss": 0.434394896030426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8695248365402222, "epoch": 6.74, "learning_rate": 1.8113083497698882e-05, "loss": 0.7113, "step": 7973, "task_loss": 1.3541882038116455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8894485235214233, "epoch": 6.74, "learning_rate": 1.810838733915657e-05, "loss": 0.8444, "step": 7974, "task_loss": 0.7404197454452515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4245544672012329, "epoch": 6.74, "learning_rate": 1.8103691180614258e-05, "loss": 0.7218, "step": 7975, "task_loss": 0.18259556591510773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7358359098434448, "epoch": 6.74, "learning_rate": 1.8098995022071948e-05, "loss": 0.7188, "step": 7976, "task_loss": 0.813023567199707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5175817012786865, "epoch": 6.74, "learning_rate": 1.8094298863529634e-05, "loss": 0.6881, "step": 7977, "task_loss": 0.5006885528564453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6883141994476318, "epoch": 6.74, "learning_rate": 1.808960270498732e-05, "loss": 0.856, "step": 7978, "task_loss": 0.8819331526756287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0683388710021973, "epoch": 6.74, "learning_rate": 1.808490654644501e-05, "loss": 0.8585, "step": 7979, "task_loss": 0.6888999342918396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4435354471206665, "epoch": 6.75, "learning_rate": 1.8080210387902696e-05, "loss": 0.6024, "step": 7980, "task_loss": 0.6040195822715759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8698724508285522, "epoch": 6.75, "learning_rate": 1.8075514229360386e-05, "loss": 0.7138, "step": 7981, "task_loss": 0.8517537713050842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4946536421775818, "epoch": 6.75, "learning_rate": 1.8070818070818072e-05, "loss": 0.6099, "step": 7982, "task_loss": 0.31087616086006165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7112031579017639, "epoch": 6.75, "learning_rate": 1.806612191227576e-05, "loss": 1.0473, "step": 7983, "task_loss": 0.7432671189308167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31455132365226746, "epoch": 6.75, "learning_rate": 1.8061425753733445e-05, "loss": 0.7274, "step": 7984, "task_loss": 0.19216325879096985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4368973970413208, "epoch": 6.75, "learning_rate": 1.8056729595191135e-05, "loss": 0.6274, "step": 7985, "task_loss": 0.49522876739501953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4263665974140167, "epoch": 6.75, "learning_rate": 1.805203343664882e-05, "loss": 0.7326, "step": 7986, "task_loss": 1.000097632408142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5721075534820557, "epoch": 6.75, "learning_rate": 1.804733727810651e-05, "loss": 0.8102, "step": 7987, "task_loss": 0.5954939126968384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6449824571609497, "epoch": 6.75, "learning_rate": 1.8042641119564197e-05, "loss": 0.5692, "step": 7988, "task_loss": 1.3372905254364014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9555219411849976, "epoch": 6.75, "learning_rate": 1.8037944961021887e-05, "loss": 0.7545, "step": 7989, "task_loss": 0.6906490325927734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1189849376678467, "epoch": 6.75, "learning_rate": 1.803324880247957e-05, "loss": 0.898, "step": 7990, "task_loss": 1.4956341981887817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.666096568107605, "epoch": 6.75, "learning_rate": 1.802855264393726e-05, "loss": 0.6194, "step": 7991, "task_loss": 0.23243533074855804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8120143413543701, "epoch": 6.76, "learning_rate": 1.8023856485394946e-05, "loss": 0.7433, "step": 7992, "task_loss": 0.7097808122634888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.880958080291748, "epoch": 6.76, "learning_rate": 1.8019160326852636e-05, "loss": 0.8294, "step": 7993, "task_loss": 0.821125328540802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4344630837440491, "epoch": 6.76, "learning_rate": 1.8014464168310325e-05, "loss": 0.5122, "step": 7994, "task_loss": 0.1951906681060791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7719666957855225, "epoch": 6.76, "learning_rate": 1.8009768009768012e-05, "loss": 0.8123, "step": 7995, "task_loss": 2.1187140941619873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7557183504104614, "epoch": 6.76, "learning_rate": 1.8005071851225698e-05, "loss": 0.5947, "step": 7996, "task_loss": 0.4008740186691284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6425890922546387, "epoch": 6.76, "learning_rate": 1.8000375692683384e-05, "loss": 0.6645, "step": 7997, "task_loss": 0.8010776042938232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.027559757232666, "epoch": 6.76, "learning_rate": 1.7995679534141074e-05, "loss": 0.7021, "step": 7998, "task_loss": 1.4539543390274048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.069153070449829, "epoch": 6.76, "learning_rate": 1.799098337559876e-05, "loss": 0.8323, "step": 7999, "task_loss": 0.5502726435661316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6071944832801819, "epoch": 6.76, "learning_rate": 1.798628721705645e-05, "loss": 0.6901, "step": 8000, "task_loss": 0.44921377301216125 }, { "epoch": 6.76, "eval_accuracy": 0.8946534653465347, "eval_loss": 0.4360075891017914, "eval_runtime": 224.462, "eval_samples_per_second": 112.491, "eval_steps_per_second": 0.882, "step": 8000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7147499322891235, "epoch": 6.76, "learning_rate": 1.7981591058514137e-05, "loss": 0.8883, "step": 8001, "task_loss": 1.2215403318405151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0802147388458252, "epoch": 6.76, "learning_rate": 1.7976894899971823e-05, "loss": 0.6785, "step": 8002, "task_loss": 0.6150112152099609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.21781587600708, "epoch": 6.76, "learning_rate": 1.797219874142951e-05, "loss": 0.6843, "step": 8003, "task_loss": 1.4609410762786865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43777745962142944, "epoch": 6.77, "learning_rate": 1.79675025828872e-05, "loss": 0.5456, "step": 8004, "task_loss": 0.5899250507354736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.887982189655304, "epoch": 6.77, "learning_rate": 1.7962806424344885e-05, "loss": 0.9674, "step": 8005, "task_loss": 1.1982709169387817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4891730844974518, "epoch": 6.77, "learning_rate": 1.7958110265802575e-05, "loss": 0.7109, "step": 8006, "task_loss": 0.8818010091781616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5106393694877625, "epoch": 6.77, "learning_rate": 1.795341410726026e-05, "loss": 0.6777, "step": 8007, "task_loss": 0.7671254873275757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7531392574310303, "epoch": 6.77, "learning_rate": 1.794871794871795e-05, "loss": 0.6981, "step": 8008, "task_loss": 0.48354974389076233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7841693162918091, "epoch": 6.77, "learning_rate": 1.7944021790175637e-05, "loss": 0.7599, "step": 8009, "task_loss": 0.7773182392120361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6043384075164795, "epoch": 6.77, "learning_rate": 1.7939325631633324e-05, "loss": 0.6388, "step": 8010, "task_loss": 0.624013364315033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6658207178115845, "epoch": 6.77, "learning_rate": 1.7934629473091014e-05, "loss": 0.7151, "step": 8011, "task_loss": 0.5321700572967529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6693577170372009, "epoch": 6.77, "learning_rate": 1.79299333145487e-05, "loss": 0.7578, "step": 8012, "task_loss": 1.1395026445388794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5998744368553162, "epoch": 6.77, "learning_rate": 1.792523715600639e-05, "loss": 0.7462, "step": 8013, "task_loss": 0.3999220132827759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5227078795433044, "epoch": 6.77, "learning_rate": 1.7920540997464076e-05, "loss": 0.5717, "step": 8014, "task_loss": 1.2674517631530762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5787570476531982, "epoch": 6.77, "learning_rate": 1.7915844838921762e-05, "loss": 0.7233, "step": 8015, "task_loss": 0.8126749396324158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7172110080718994, "epoch": 6.78, "learning_rate": 1.791114868037945e-05, "loss": 0.7002, "step": 8016, "task_loss": 0.9083460569381714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5829256176948547, "epoch": 6.78, "learning_rate": 1.790645252183714e-05, "loss": 0.7496, "step": 8017, "task_loss": 0.8680709004402161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7751043438911438, "epoch": 6.78, "learning_rate": 1.7901756363294825e-05, "loss": 0.6544, "step": 8018, "task_loss": 1.9333314895629883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5033655762672424, "epoch": 6.78, "learning_rate": 1.7897060204752514e-05, "loss": 0.6191, "step": 8019, "task_loss": 0.38029056787490845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8208957314491272, "epoch": 6.78, "learning_rate": 1.78923640462102e-05, "loss": 0.5755, "step": 8020, "task_loss": 0.9690202474594116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6793891787528992, "epoch": 6.78, "learning_rate": 1.788766788766789e-05, "loss": 0.8338, "step": 8021, "task_loss": 0.6488433480262756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7614554166793823, "epoch": 6.78, "learning_rate": 1.7882971729125573e-05, "loss": 0.79, "step": 8022, "task_loss": 1.3003549575805664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35664159059524536, "epoch": 6.78, "learning_rate": 1.7878275570583263e-05, "loss": 0.6384, "step": 8023, "task_loss": 0.13410291075706482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7318965196609497, "epoch": 6.78, "learning_rate": 1.787357941204095e-05, "loss": 0.6513, "step": 8024, "task_loss": 0.832478940486908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29930248856544495, "epoch": 6.78, "learning_rate": 1.786888325349864e-05, "loss": 0.5064, "step": 8025, "task_loss": 0.4078892469406128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7408538460731506, "epoch": 6.78, "learning_rate": 1.786418709495633e-05, "loss": 0.5627, "step": 8026, "task_loss": 1.0686321258544922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7140425443649292, "epoch": 6.78, "learning_rate": 1.7859490936414015e-05, "loss": 0.6513, "step": 8027, "task_loss": 1.3702499866485596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4995856285095215, "epoch": 6.79, "learning_rate": 1.78547947778717e-05, "loss": 0.6022, "step": 8028, "task_loss": 0.785309374332428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8976219892501831, "epoch": 6.79, "learning_rate": 1.7850098619329388e-05, "loss": 0.6563, "step": 8029, "task_loss": 1.3166918754577637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5022231340408325, "epoch": 6.79, "learning_rate": 1.7845402460787078e-05, "loss": 0.6113, "step": 8030, "task_loss": 0.8298333287239075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9448496103286743, "epoch": 6.79, "learning_rate": 1.7840706302244764e-05, "loss": 0.7507, "step": 8031, "task_loss": 1.5081721544265747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6059993505477905, "epoch": 6.79, "learning_rate": 1.7836010143702454e-05, "loss": 0.5416, "step": 8032, "task_loss": 0.5592650771141052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5843275189399719, "epoch": 6.79, "learning_rate": 1.783131398516014e-05, "loss": 0.5887, "step": 8033, "task_loss": 0.8235524296760559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5470760464668274, "epoch": 6.79, "learning_rate": 1.7826617826617826e-05, "loss": 0.7689, "step": 8034, "task_loss": 0.4674065411090851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5398311614990234, "epoch": 6.79, "learning_rate": 1.7821921668075513e-05, "loss": 0.6136, "step": 8035, "task_loss": 0.891055166721344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5759966373443604, "epoch": 6.79, "learning_rate": 1.7817225509533203e-05, "loss": 0.6937, "step": 8036, "task_loss": 0.486447274684906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7485703825950623, "epoch": 6.79, "learning_rate": 1.781252935099089e-05, "loss": 0.6855, "step": 8037, "task_loss": 0.41339340806007385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8517266511917114, "epoch": 6.79, "learning_rate": 1.780783319244858e-05, "loss": 0.6539, "step": 8038, "task_loss": 0.7551946043968201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5453771948814392, "epoch": 6.79, "learning_rate": 1.7803137033906265e-05, "loss": 0.6201, "step": 8039, "task_loss": 0.9347697496414185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5737707614898682, "epoch": 6.8, "learning_rate": 1.7798440875363955e-05, "loss": 0.5233, "step": 8040, "task_loss": 0.19361069798469543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7031677961349487, "epoch": 6.8, "learning_rate": 1.779374471682164e-05, "loss": 0.7326, "step": 8041, "task_loss": 0.6400628089904785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8090164661407471, "epoch": 6.8, "learning_rate": 1.7789048558279327e-05, "loss": 0.7395, "step": 8042, "task_loss": 0.5944257378578186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4062674641609192, "epoch": 6.8, "learning_rate": 1.7784352399737017e-05, "loss": 0.6435, "step": 8043, "task_loss": 0.13752858340740204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.225386381149292, "epoch": 6.8, "learning_rate": 1.7779656241194703e-05, "loss": 0.6984, "step": 8044, "task_loss": 0.01736661046743393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.040738582611084, "epoch": 6.8, "learning_rate": 1.7774960082652393e-05, "loss": 0.6978, "step": 8045, "task_loss": 0.9021987915039062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8206679224967957, "epoch": 6.8, "learning_rate": 1.777026392411008e-05, "loss": 0.6526, "step": 8046, "task_loss": 0.7670672535896301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6493248343467712, "epoch": 6.8, "learning_rate": 1.7765567765567766e-05, "loss": 0.5243, "step": 8047, "task_loss": 0.4374203383922577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.598569393157959, "epoch": 6.8, "learning_rate": 1.7760871607025452e-05, "loss": 0.789, "step": 8048, "task_loss": 0.589989960193634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6627638936042786, "epoch": 6.8, "learning_rate": 1.7756175448483142e-05, "loss": 0.8226, "step": 8049, "task_loss": 0.44606295228004456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42342430353164673, "epoch": 6.8, "learning_rate": 1.7751479289940828e-05, "loss": 0.5911, "step": 8050, "task_loss": 1.020456314086914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5541496276855469, "epoch": 6.81, "learning_rate": 1.7746783131398518e-05, "loss": 0.4415, "step": 8051, "task_loss": 0.6412481069564819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7104198932647705, "epoch": 6.81, "learning_rate": 1.7742086972856204e-05, "loss": 0.8084, "step": 8052, "task_loss": 0.3165763020515442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8273806571960449, "epoch": 6.81, "learning_rate": 1.773739081431389e-05, "loss": 0.7113, "step": 8053, "task_loss": 0.39404451847076416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4942731261253357, "epoch": 6.81, "learning_rate": 1.7732694655771577e-05, "loss": 0.4811, "step": 8054, "task_loss": 0.2583726942539215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3511680364608765, "epoch": 6.81, "learning_rate": 1.7727998497229267e-05, "loss": 0.7273, "step": 8055, "task_loss": 1.3018125295639038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47184252738952637, "epoch": 6.81, "learning_rate": 1.7723302338686956e-05, "loss": 0.54, "step": 8056, "task_loss": 0.4830264449119568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5420556664466858, "epoch": 6.81, "learning_rate": 1.7718606180144643e-05, "loss": 0.6471, "step": 8057, "task_loss": 1.3466905355453491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5653663277626038, "epoch": 6.81, "learning_rate": 1.7713910021602333e-05, "loss": 0.4945, "step": 8058, "task_loss": 0.7536539435386658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5841782093048096, "epoch": 6.81, "learning_rate": 1.770921386306002e-05, "loss": 0.779, "step": 8059, "task_loss": 0.9353388547897339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6040916442871094, "epoch": 6.81, "learning_rate": 1.7704517704517705e-05, "loss": 0.8012, "step": 8060, "task_loss": 0.6842377781867981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1073062419891357, "epoch": 6.81, "learning_rate": 1.769982154597539e-05, "loss": 0.6706, "step": 8061, "task_loss": 0.615308403968811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5754783749580383, "epoch": 6.81, "learning_rate": 1.769512538743308e-05, "loss": 0.5478, "step": 8062, "task_loss": 0.30593782663345337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6428370475769043, "epoch": 6.82, "learning_rate": 1.7690429228890768e-05, "loss": 0.6452, "step": 8063, "task_loss": 0.41472890973091125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4180375337600708, "epoch": 6.82, "learning_rate": 1.7685733070348457e-05, "loss": 0.6795, "step": 8064, "task_loss": 0.12286385148763657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9844945669174194, "epoch": 6.82, "learning_rate": 1.7681036911806144e-05, "loss": 0.7363, "step": 8065, "task_loss": 0.4261656403541565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5611470341682434, "epoch": 6.82, "learning_rate": 1.767634075326383e-05, "loss": 0.4999, "step": 8066, "task_loss": 0.6153940558433533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5427695512771606, "epoch": 6.82, "learning_rate": 1.7671644594721516e-05, "loss": 0.6642, "step": 8067, "task_loss": 1.0396978855133057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.594118058681488, "epoch": 6.82, "learning_rate": 1.7666948436179206e-05, "loss": 0.5842, "step": 8068, "task_loss": 0.4946383237838745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6018342971801758, "epoch": 6.82, "learning_rate": 1.7662252277636892e-05, "loss": 0.6315, "step": 8069, "task_loss": 0.5936048030853271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6681912541389465, "epoch": 6.82, "learning_rate": 1.7657556119094582e-05, "loss": 0.6742, "step": 8070, "task_loss": 0.7092284560203552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0278332233428955, "epoch": 6.82, "learning_rate": 1.7652859960552272e-05, "loss": 0.856, "step": 8071, "task_loss": 1.7079899311065674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42253851890563965, "epoch": 6.82, "learning_rate": 1.7648163802009958e-05, "loss": 0.5287, "step": 8072, "task_loss": 0.5619276762008667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0920625925064087, "epoch": 6.82, "learning_rate": 1.7643467643467645e-05, "loss": 0.6795, "step": 8073, "task_loss": 1.2036529779434204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46253782510757446, "epoch": 6.82, "learning_rate": 1.763877148492533e-05, "loss": 0.5801, "step": 8074, "task_loss": 0.5560187697410583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.335463285446167, "epoch": 6.83, "learning_rate": 1.763407532638302e-05, "loss": 0.6874, "step": 8075, "task_loss": 0.1339855045080185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49251237511634827, "epoch": 6.83, "learning_rate": 1.7629379167840707e-05, "loss": 0.5556, "step": 8076, "task_loss": 0.20216558873653412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5557873845100403, "epoch": 6.83, "learning_rate": 1.7624683009298397e-05, "loss": 0.6527, "step": 8077, "task_loss": 1.0100558996200562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6040736436843872, "epoch": 6.83, "learning_rate": 1.7619986850756083e-05, "loss": 0.5642, "step": 8078, "task_loss": 1.1811249256134033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4806763529777527, "epoch": 6.83, "learning_rate": 1.761529069221377e-05, "loss": 0.5244, "step": 8079, "task_loss": 0.4966632127761841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7543705701828003, "epoch": 6.83, "learning_rate": 1.7610594533671456e-05, "loss": 0.7559, "step": 8080, "task_loss": 1.329698920249939 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5508150458335876, "epoch": 6.83, "learning_rate": 1.7605898375129145e-05, "loss": 0.7841, "step": 8081, "task_loss": 1.1822662353515625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6501812934875488, "epoch": 6.83, "learning_rate": 1.7601202216586832e-05, "loss": 0.7941, "step": 8082, "task_loss": 0.3081994354724884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7510744333267212, "epoch": 6.83, "learning_rate": 1.759650605804452e-05, "loss": 0.7763, "step": 8083, "task_loss": 0.7243291735649109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.956336498260498, "epoch": 6.83, "learning_rate": 1.7591809899502208e-05, "loss": 0.6837, "step": 8084, "task_loss": 0.46162810921669006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5383470058441162, "epoch": 6.83, "learning_rate": 1.7587113740959894e-05, "loss": 0.681, "step": 8085, "task_loss": 0.4536074697971344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.784961998462677, "epoch": 6.83, "learning_rate": 1.7582417582417584e-05, "loss": 0.6094, "step": 8086, "task_loss": 0.33034834265708923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7045609354972839, "epoch": 6.84, "learning_rate": 1.757772142387527e-05, "loss": 0.7174, "step": 8087, "task_loss": 1.028638482093811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5173611640930176, "epoch": 6.84, "learning_rate": 1.757302526533296e-05, "loss": 0.5549, "step": 8088, "task_loss": 0.6506786346435547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8422285318374634, "epoch": 6.84, "learning_rate": 1.7568329106790646e-05, "loss": 0.649, "step": 8089, "task_loss": 0.40894627571105957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2739871144294739, "epoch": 6.84, "learning_rate": 1.7563632948248336e-05, "loss": 0.5807, "step": 8090, "task_loss": 0.22858518362045288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7770801782608032, "epoch": 6.84, "learning_rate": 1.7558936789706022e-05, "loss": 0.6699, "step": 8091, "task_loss": 0.3601386249065399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0702570676803589, "epoch": 6.84, "learning_rate": 1.755424063116371e-05, "loss": 0.6123, "step": 8092, "task_loss": 0.9559454321861267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.087671160697937, "epoch": 6.84, "learning_rate": 1.7549544472621395e-05, "loss": 0.841, "step": 8093, "task_loss": 1.1169887781143188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3109525442123413, "epoch": 6.84, "learning_rate": 1.7544848314079085e-05, "loss": 0.4811, "step": 8094, "task_loss": 0.15527035295963287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8392810821533203, "epoch": 6.84, "learning_rate": 1.754015215553677e-05, "loss": 0.6976, "step": 8095, "task_loss": 1.2955424785614014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8192136287689209, "epoch": 6.84, "learning_rate": 1.753545599699446e-05, "loss": 0.6758, "step": 8096, "task_loss": 0.8804548382759094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6948935389518738, "epoch": 6.84, "learning_rate": 1.7530759838452147e-05, "loss": 0.6869, "step": 8097, "task_loss": 1.0964992046356201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3261141777038574, "epoch": 6.84, "learning_rate": 1.7526063679909834e-05, "loss": 0.7088, "step": 8098, "task_loss": 0.4267514646053314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5495062470436096, "epoch": 6.85, "learning_rate": 1.752136752136752e-05, "loss": 0.5317, "step": 8099, "task_loss": 0.6666022539138794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6328444480895996, "epoch": 6.85, "learning_rate": 1.751667136282521e-05, "loss": 0.7283, "step": 8100, "task_loss": 0.746303141117096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5790407061576843, "epoch": 6.85, "learning_rate": 1.7511975204282896e-05, "loss": 0.5943, "step": 8101, "task_loss": 1.143196702003479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5486317873001099, "epoch": 6.85, "learning_rate": 1.7507279045740586e-05, "loss": 0.598, "step": 8102, "task_loss": 0.1207600012421608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6027767658233643, "epoch": 6.85, "learning_rate": 1.7502582887198275e-05, "loss": 0.764, "step": 8103, "task_loss": 0.6370188593864441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5035058259963989, "epoch": 6.85, "learning_rate": 1.749788672865596e-05, "loss": 0.6522, "step": 8104, "task_loss": 1.0138121843338013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3964279592037201, "epoch": 6.85, "learning_rate": 1.7493190570113648e-05, "loss": 0.5997, "step": 8105, "task_loss": 1.2202740907669067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6264592409133911, "epoch": 6.85, "learning_rate": 1.7488494411571334e-05, "loss": 0.6165, "step": 8106, "task_loss": 0.8427432775497437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46462804079055786, "epoch": 6.85, "learning_rate": 1.7483798253029024e-05, "loss": 0.4254, "step": 8107, "task_loss": 1.0002202987670898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40111348032951355, "epoch": 6.85, "learning_rate": 1.747910209448671e-05, "loss": 0.6746, "step": 8108, "task_loss": 0.6348126530647278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8744820356369019, "epoch": 6.85, "learning_rate": 1.74744059359444e-05, "loss": 0.8303, "step": 8109, "task_loss": 0.5973114967346191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7663622498512268, "epoch": 6.85, "learning_rate": 1.7469709777402087e-05, "loss": 0.7366, "step": 8110, "task_loss": 1.1841180324554443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.749495267868042, "epoch": 6.86, "learning_rate": 1.7465013618859773e-05, "loss": 0.6697, "step": 8111, "task_loss": 0.7143357992172241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3780057728290558, "epoch": 6.86, "learning_rate": 1.746031746031746e-05, "loss": 0.497, "step": 8112, "task_loss": 0.36710458993911743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6374988555908203, "epoch": 6.86, "learning_rate": 1.745562130177515e-05, "loss": 0.6788, "step": 8113, "task_loss": 1.2055132389068604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6157052516937256, "epoch": 6.86, "learning_rate": 1.7450925143232835e-05, "loss": 0.7606, "step": 8114, "task_loss": 0.31126949191093445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6471995115280151, "epoch": 6.86, "learning_rate": 1.7446228984690525e-05, "loss": 0.6435, "step": 8115, "task_loss": 0.3259549140930176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4850677251815796, "epoch": 6.86, "learning_rate": 1.744153282614821e-05, "loss": 0.4269, "step": 8116, "task_loss": 0.261991024017334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7356256246566772, "epoch": 6.86, "learning_rate": 1.7436836667605898e-05, "loss": 0.6499, "step": 8117, "task_loss": 0.7735035419464111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7398495674133301, "epoch": 6.86, "learning_rate": 1.7432140509063587e-05, "loss": 0.5281, "step": 8118, "task_loss": 0.9267300367355347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7204630374908447, "epoch": 6.86, "learning_rate": 1.7427444350521274e-05, "loss": 0.7132, "step": 8119, "task_loss": 1.2892659902572632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6468266248703003, "epoch": 6.86, "learning_rate": 1.7422748191978963e-05, "loss": 0.7575, "step": 8120, "task_loss": 0.3640287518501282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6192712783813477, "epoch": 6.86, "learning_rate": 1.741805203343665e-05, "loss": 0.7154, "step": 8121, "task_loss": 1.0953335762023926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9509196281433105, "epoch": 6.87, "learning_rate": 1.741335587489434e-05, "loss": 0.8418, "step": 8122, "task_loss": 0.9723795652389526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8146560788154602, "epoch": 6.87, "learning_rate": 1.7408659716352026e-05, "loss": 0.6911, "step": 8123, "task_loss": 0.8127323389053345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5421720147132874, "epoch": 6.87, "learning_rate": 1.7403963557809712e-05, "loss": 0.7836, "step": 8124, "task_loss": 1.6105235815048218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7024751901626587, "epoch": 6.87, "learning_rate": 1.73992673992674e-05, "loss": 0.7758, "step": 8125, "task_loss": 1.0667471885681152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43269914388656616, "epoch": 6.87, "learning_rate": 1.7394571240725088e-05, "loss": 0.7017, "step": 8126, "task_loss": 0.9287610054016113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7263235449790955, "epoch": 6.87, "learning_rate": 1.7389875082182775e-05, "loss": 0.7463, "step": 8127, "task_loss": 1.3094127178192139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49897927045822144, "epoch": 6.87, "learning_rate": 1.7385178923640464e-05, "loss": 0.5418, "step": 8128, "task_loss": 0.16694821417331696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2519818544387817, "epoch": 6.87, "learning_rate": 1.738048276509815e-05, "loss": 0.9589, "step": 8129, "task_loss": 0.7563779354095459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9648216366767883, "epoch": 6.87, "learning_rate": 1.7375786606555837e-05, "loss": 0.6941, "step": 8130, "task_loss": 0.2845327854156494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5791811943054199, "epoch": 6.87, "learning_rate": 1.7371090448013523e-05, "loss": 0.6114, "step": 8131, "task_loss": 0.8264464139938354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26399320363998413, "epoch": 6.87, "learning_rate": 1.7366394289471213e-05, "loss": 0.6065, "step": 8132, "task_loss": 0.9794647693634033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5326921343803406, "epoch": 6.87, "learning_rate": 1.7361698130928903e-05, "loss": 0.6058, "step": 8133, "task_loss": 0.23952823877334595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2830787897109985, "epoch": 6.88, "learning_rate": 1.735700197238659e-05, "loss": 0.784, "step": 8134, "task_loss": 1.3932160139083862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7290500402450562, "epoch": 6.88, "learning_rate": 1.735230581384428e-05, "loss": 0.8211, "step": 8135, "task_loss": 1.2411859035491943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40725135803222656, "epoch": 6.88, "learning_rate": 1.7347609655301962e-05, "loss": 0.4629, "step": 8136, "task_loss": 0.18049219250679016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6755633354187012, "epoch": 6.88, "learning_rate": 1.734291349675965e-05, "loss": 0.6895, "step": 8137, "task_loss": 1.6161190271377563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5553630590438843, "epoch": 6.88, "learning_rate": 1.7338217338217338e-05, "loss": 0.672, "step": 8138, "task_loss": 1.0494130849838257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.530738353729248, "epoch": 6.88, "learning_rate": 1.7333521179675028e-05, "loss": 0.8199, "step": 8139, "task_loss": 0.4736010432243347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4942363500595093, "epoch": 6.88, "learning_rate": 1.7328825021132714e-05, "loss": 0.5225, "step": 8140, "task_loss": 0.1593048870563507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4909782409667969, "epoch": 6.88, "learning_rate": 1.7324128862590404e-05, "loss": 0.5078, "step": 8141, "task_loss": 0.15476001799106598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.336698442697525, "epoch": 6.88, "learning_rate": 1.731943270404809e-05, "loss": 0.6221, "step": 8142, "task_loss": 0.33957767486572266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7681174278259277, "epoch": 6.88, "learning_rate": 1.7314736545505776e-05, "loss": 0.6749, "step": 8143, "task_loss": 0.6715535521507263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6022106409072876, "epoch": 6.88, "learning_rate": 1.7310040386963463e-05, "loss": 0.5496, "step": 8144, "task_loss": 0.31297504901885986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6505686044692993, "epoch": 6.88, "learning_rate": 1.7305344228421152e-05, "loss": 0.5022, "step": 8145, "task_loss": 0.8075226545333862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6702520847320557, "epoch": 6.89, "learning_rate": 1.730064806987884e-05, "loss": 0.9787, "step": 8146, "task_loss": 0.49944767355918884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45899665355682373, "epoch": 6.89, "learning_rate": 1.729595191133653e-05, "loss": 0.8214, "step": 8147, "task_loss": 0.35769209265708923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41930079460144043, "epoch": 6.89, "learning_rate": 1.7291255752794215e-05, "loss": 0.5729, "step": 8148, "task_loss": 0.7245598435401917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37191301584243774, "epoch": 6.89, "learning_rate": 1.72865595942519e-05, "loss": 0.5525, "step": 8149, "task_loss": 0.2642701268196106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6715090274810791, "epoch": 6.89, "learning_rate": 1.728186343570959e-05, "loss": 0.7179, "step": 8150, "task_loss": 0.346011757850647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9891663193702698, "epoch": 6.89, "learning_rate": 1.7277167277167277e-05, "loss": 0.71, "step": 8151, "task_loss": 1.3203452825546265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4659188389778137, "epoch": 6.89, "learning_rate": 1.7272471118624967e-05, "loss": 0.5935, "step": 8152, "task_loss": 0.27557840943336487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9953070878982544, "epoch": 6.89, "learning_rate": 1.7267774960082653e-05, "loss": 0.7179, "step": 8153, "task_loss": 1.3047233819961548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1776931285858154, "epoch": 6.89, "learning_rate": 1.7263078801540343e-05, "loss": 0.7732, "step": 8154, "task_loss": 1.0270800590515137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4829953908920288, "epoch": 6.89, "learning_rate": 1.725838264299803e-05, "loss": 0.5419, "step": 8155, "task_loss": 0.3708091676235199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7376128435134888, "epoch": 6.89, "learning_rate": 1.7253686484455716e-05, "loss": 0.7159, "step": 8156, "task_loss": 1.1183112859725952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5287071466445923, "epoch": 6.89, "learning_rate": 1.7248990325913402e-05, "loss": 0.8456, "step": 8157, "task_loss": 0.39483872056007385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8433346748352051, "epoch": 6.9, "learning_rate": 1.7244294167371092e-05, "loss": 0.6208, "step": 8158, "task_loss": 1.2327898740768433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8287795186042786, "epoch": 6.9, "learning_rate": 1.7239598008828778e-05, "loss": 0.8432, "step": 8159, "task_loss": 0.694749116897583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5392560958862305, "epoch": 6.9, "learning_rate": 1.7234901850286468e-05, "loss": 0.6772, "step": 8160, "task_loss": 0.46893996000289917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6661648750305176, "epoch": 6.9, "learning_rate": 1.7230205691744154e-05, "loss": 0.7727, "step": 8161, "task_loss": 0.36928653717041016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49396270513534546, "epoch": 6.9, "learning_rate": 1.722550953320184e-05, "loss": 0.6454, "step": 8162, "task_loss": 0.36113879084587097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43735483288764954, "epoch": 6.9, "learning_rate": 1.722081337465953e-05, "loss": 0.7907, "step": 8163, "task_loss": 0.40130090713500977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46012550592422485, "epoch": 6.9, "learning_rate": 1.7216117216117217e-05, "loss": 0.6402, "step": 8164, "task_loss": 0.8655164241790771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29519638419151306, "epoch": 6.9, "learning_rate": 1.7211421057574906e-05, "loss": 0.6281, "step": 8165, "task_loss": 0.2730313837528229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48464101552963257, "epoch": 6.9, "learning_rate": 1.7206724899032593e-05, "loss": 0.6442, "step": 8166, "task_loss": 0.6337299942970276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5642828941345215, "epoch": 6.9, "learning_rate": 1.7202028740490282e-05, "loss": 0.6603, "step": 8167, "task_loss": 0.4339548647403717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44991999864578247, "epoch": 6.9, "learning_rate": 1.7197332581947965e-05, "loss": 0.6372, "step": 8168, "task_loss": 0.4483765959739685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6341661214828491, "epoch": 6.9, "learning_rate": 1.7192636423405655e-05, "loss": 0.557, "step": 8169, "task_loss": 0.6210566759109497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6840400099754333, "epoch": 6.91, "learning_rate": 1.718794026486334e-05, "loss": 0.6892, "step": 8170, "task_loss": 0.7249706387519836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6005399227142334, "epoch": 6.91, "learning_rate": 1.718324410632103e-05, "loss": 0.7296, "step": 8171, "task_loss": 1.117541790008545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.856312096118927, "epoch": 6.91, "learning_rate": 1.7178547947778718e-05, "loss": 0.7723, "step": 8172, "task_loss": 1.2925097942352295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6298449039459229, "epoch": 6.91, "learning_rate": 1.7173851789236407e-05, "loss": 0.5732, "step": 8173, "task_loss": 1.666282057762146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2480878829956055, "epoch": 6.91, "learning_rate": 1.7169155630694094e-05, "loss": 0.8967, "step": 8174, "task_loss": 1.4410938024520874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1359854936599731, "epoch": 6.91, "learning_rate": 1.716445947215178e-05, "loss": 0.6551, "step": 8175, "task_loss": 0.7011289596557617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5866140723228455, "epoch": 6.91, "learning_rate": 1.7159763313609466e-05, "loss": 0.5985, "step": 8176, "task_loss": 0.18906769156455994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5747870802879333, "epoch": 6.91, "learning_rate": 1.7155067155067156e-05, "loss": 0.6783, "step": 8177, "task_loss": 2.2330524921417236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36335793137550354, "epoch": 6.91, "learning_rate": 1.7150370996524842e-05, "loss": 0.6207, "step": 8178, "task_loss": 0.2969452142715454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4351349174976349, "epoch": 6.91, "learning_rate": 1.7145674837982532e-05, "loss": 0.5377, "step": 8179, "task_loss": 0.9535549283027649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5565478205680847, "epoch": 6.91, "learning_rate": 1.714097867944022e-05, "loss": 0.5506, "step": 8180, "task_loss": 0.5149583220481873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8702678084373474, "epoch": 6.91, "learning_rate": 1.7136282520897905e-05, "loss": 0.7351, "step": 8181, "task_loss": 1.2248574495315552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5534543395042419, "epoch": 6.92, "learning_rate": 1.7131586362355594e-05, "loss": 0.6479, "step": 8182, "task_loss": 0.93807053565979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9886804819107056, "epoch": 6.92, "learning_rate": 1.712689020381328e-05, "loss": 0.8569, "step": 8183, "task_loss": 1.8001503944396973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7090643048286438, "epoch": 6.92, "learning_rate": 1.712219404527097e-05, "loss": 0.7635, "step": 8184, "task_loss": 1.3601869344711304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6994132399559021, "epoch": 6.92, "learning_rate": 1.7117497886728657e-05, "loss": 0.6324, "step": 8185, "task_loss": 0.5749340653419495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6865054368972778, "epoch": 6.92, "learning_rate": 1.7112801728186347e-05, "loss": 0.6116, "step": 8186, "task_loss": 1.2295653820037842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35728245973587036, "epoch": 6.92, "learning_rate": 1.710810556964403e-05, "loss": 0.5767, "step": 8187, "task_loss": 0.174751877784729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.584659218788147, "epoch": 6.92, "learning_rate": 1.710340941110172e-05, "loss": 0.5192, "step": 8188, "task_loss": 0.39362096786499023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6577816009521484, "epoch": 6.92, "learning_rate": 1.7098713252559406e-05, "loss": 0.6385, "step": 8189, "task_loss": 0.7979651689529419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7161285281181335, "epoch": 6.92, "learning_rate": 1.7094017094017095e-05, "loss": 0.7058, "step": 8190, "task_loss": 2.0173275470733643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3295747935771942, "epoch": 6.92, "learning_rate": 1.7089320935474782e-05, "loss": 0.4392, "step": 8191, "task_loss": 0.6205171942710876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9042061567306519, "epoch": 6.92, "learning_rate": 1.708462477693247e-05, "loss": 0.7797, "step": 8192, "task_loss": 0.8623096942901611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5689709186553955, "epoch": 6.93, "learning_rate": 1.7079928618390158e-05, "loss": 0.6375, "step": 8193, "task_loss": 0.5217279195785522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7620458602905273, "epoch": 6.93, "learning_rate": 1.7075232459847844e-05, "loss": 0.5613, "step": 8194, "task_loss": 0.2193094789981842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.685788631439209, "epoch": 6.93, "learning_rate": 1.7070536301305534e-05, "loss": 0.5936, "step": 8195, "task_loss": 0.306204229593277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9678001403808594, "epoch": 6.93, "learning_rate": 1.706584014276322e-05, "loss": 0.7534, "step": 8196, "task_loss": 0.8999829292297363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0171301364898682, "epoch": 6.93, "learning_rate": 1.706114398422091e-05, "loss": 0.7315, "step": 8197, "task_loss": 0.8240467309951782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46369001269340515, "epoch": 6.93, "learning_rate": 1.7056447825678596e-05, "loss": 0.7203, "step": 8198, "task_loss": 0.4040193259716034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43291768431663513, "epoch": 6.93, "learning_rate": 1.7051751667136283e-05, "loss": 0.6035, "step": 8199, "task_loss": 0.7873587012290955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48611515760421753, "epoch": 6.93, "learning_rate": 1.704705550859397e-05, "loss": 0.6886, "step": 8200, "task_loss": 0.45923715829849243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37436777353286743, "epoch": 6.93, "learning_rate": 1.704235935005166e-05, "loss": 0.7652, "step": 8201, "task_loss": 0.6491209268569946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7548084259033203, "epoch": 6.93, "learning_rate": 1.7037663191509345e-05, "loss": 0.654, "step": 8202, "task_loss": 1.0524457693099976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9856065511703491, "epoch": 6.93, "learning_rate": 1.7032967032967035e-05, "loss": 0.6518, "step": 8203, "task_loss": 1.195932149887085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38021010160446167, "epoch": 6.93, "learning_rate": 1.702827087442472e-05, "loss": 0.5873, "step": 8204, "task_loss": 0.3310790956020355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5503592491149902, "epoch": 6.94, "learning_rate": 1.702357471588241e-05, "loss": 0.6392, "step": 8205, "task_loss": 0.15499332547187805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4915392994880676, "epoch": 6.94, "learning_rate": 1.7018878557340097e-05, "loss": 0.6435, "step": 8206, "task_loss": 1.4554834365844727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4707005023956299, "epoch": 6.94, "learning_rate": 1.7014182398797783e-05, "loss": 0.6452, "step": 8207, "task_loss": 0.25967416167259216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2815644145011902, "epoch": 6.94, "learning_rate": 1.700948624025547e-05, "loss": 0.6401, "step": 8208, "task_loss": 0.3524269461631775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8865159749984741, "epoch": 6.94, "learning_rate": 1.700479008171316e-05, "loss": 0.7273, "step": 8209, "task_loss": 1.0572530031204224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5900793075561523, "epoch": 6.94, "learning_rate": 1.700009392317085e-05, "loss": 0.7518, "step": 8210, "task_loss": 1.2498698234558105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9880379438400269, "epoch": 6.94, "learning_rate": 1.6995397764628536e-05, "loss": 0.7313, "step": 8211, "task_loss": 1.4080100059509277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3912244439125061, "epoch": 6.94, "learning_rate": 1.6990701606086222e-05, "loss": 0.7371, "step": 8212, "task_loss": 0.5150129199028015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3897213339805603, "epoch": 6.94, "learning_rate": 1.6986005447543908e-05, "loss": 0.6728, "step": 8213, "task_loss": 0.5229703187942505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6967080235481262, "epoch": 6.94, "learning_rate": 1.6981309289001598e-05, "loss": 0.5801, "step": 8214, "task_loss": 0.3586879372596741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1390438079833984, "epoch": 6.94, "learning_rate": 1.6976613130459284e-05, "loss": 0.817, "step": 8215, "task_loss": 0.8937646150588989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5396355390548706, "epoch": 6.94, "learning_rate": 1.6971916971916974e-05, "loss": 0.6614, "step": 8216, "task_loss": 0.4732743203639984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4702181816101074, "epoch": 6.95, "learning_rate": 1.696722081337466e-05, "loss": 0.5625, "step": 8217, "task_loss": 0.6878730654716492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4849562346935272, "epoch": 6.95, "learning_rate": 1.696252465483235e-05, "loss": 0.6269, "step": 8218, "task_loss": 1.147453784942627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48837849497795105, "epoch": 6.95, "learning_rate": 1.6957828496290033e-05, "loss": 0.5057, "step": 8219, "task_loss": 0.4561350643634796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9259575009346008, "epoch": 6.95, "learning_rate": 1.6953132337747723e-05, "loss": 1.0053, "step": 8220, "task_loss": 0.7316066026687622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9442729949951172, "epoch": 6.95, "learning_rate": 1.694843617920541e-05, "loss": 0.6158, "step": 8221, "task_loss": 1.2424458265304565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5828534364700317, "epoch": 6.95, "learning_rate": 1.69437400206631e-05, "loss": 0.6572, "step": 8222, "task_loss": 0.9033986330032349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6111237406730652, "epoch": 6.95, "learning_rate": 1.6939043862120785e-05, "loss": 0.6247, "step": 8223, "task_loss": 0.7417819499969482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47766372561454773, "epoch": 6.95, "learning_rate": 1.6934347703578475e-05, "loss": 0.5437, "step": 8224, "task_loss": 0.17822301387786865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6561529636383057, "epoch": 6.95, "learning_rate": 1.692965154503616e-05, "loss": 0.7443, "step": 8225, "task_loss": 0.21686317026615143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5029873251914978, "epoch": 6.95, "learning_rate": 1.6924955386493848e-05, "loss": 0.7953, "step": 8226, "task_loss": 0.29849186539649963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.510461151599884, "epoch": 6.95, "learning_rate": 1.6920259227951537e-05, "loss": 0.6378, "step": 8227, "task_loss": 0.7395954132080078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9259421825408936, "epoch": 6.95, "learning_rate": 1.6915563069409224e-05, "loss": 0.6886, "step": 8228, "task_loss": 0.738472580909729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7928704619407654, "epoch": 6.96, "learning_rate": 1.6910866910866913e-05, "loss": 0.7543, "step": 8229, "task_loss": 0.6231814026832581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8208722472190857, "epoch": 6.96, "learning_rate": 1.69061707523246e-05, "loss": 0.9154, "step": 8230, "task_loss": 0.6024072766304016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8753741383552551, "epoch": 6.96, "learning_rate": 1.6901474593782286e-05, "loss": 0.6322, "step": 8231, "task_loss": 0.8451560139656067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9416050314903259, "epoch": 6.96, "learning_rate": 1.6896778435239972e-05, "loss": 0.7275, "step": 8232, "task_loss": 1.3195574283599854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5051660537719727, "epoch": 6.96, "learning_rate": 1.6892082276697662e-05, "loss": 0.5219, "step": 8233, "task_loss": 0.16035617887973785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6442462205886841, "epoch": 6.96, "learning_rate": 1.688738611815535e-05, "loss": 0.6213, "step": 8234, "task_loss": 0.7473482489585876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9269366264343262, "epoch": 6.96, "learning_rate": 1.6882689959613038e-05, "loss": 0.8788, "step": 8235, "task_loss": 1.082012414932251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5751038789749146, "epoch": 6.96, "learning_rate": 1.6877993801070725e-05, "loss": 0.5119, "step": 8236, "task_loss": 0.506101667881012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6053421497344971, "epoch": 6.96, "learning_rate": 1.6873297642528414e-05, "loss": 0.566, "step": 8237, "task_loss": 1.2200074195861816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7351679801940918, "epoch": 6.96, "learning_rate": 1.68686014839861e-05, "loss": 0.604, "step": 8238, "task_loss": 0.43562018871307373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4738854467868805, "epoch": 6.96, "learning_rate": 1.6863905325443787e-05, "loss": 0.6104, "step": 8239, "task_loss": 0.3432239294052124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6267512440681458, "epoch": 6.96, "learning_rate": 1.6859209166901477e-05, "loss": 0.7023, "step": 8240, "task_loss": 0.1497192233800888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8400366306304932, "epoch": 6.97, "learning_rate": 1.6854513008359163e-05, "loss": 0.8444, "step": 8241, "task_loss": 0.26139914989471436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4619557559490204, "epoch": 6.97, "learning_rate": 1.6849816849816853e-05, "loss": 0.5575, "step": 8242, "task_loss": 0.8044344782829285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5178228616714478, "epoch": 6.97, "learning_rate": 1.684512069127454e-05, "loss": 0.7531, "step": 8243, "task_loss": 0.1319168359041214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0832070112228394, "epoch": 6.97, "learning_rate": 1.6840424532732225e-05, "loss": 0.7168, "step": 8244, "task_loss": 1.2461035251617432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9387443661689758, "epoch": 6.97, "learning_rate": 1.6835728374189912e-05, "loss": 0.7217, "step": 8245, "task_loss": 1.4785085916519165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8217938542366028, "epoch": 6.97, "learning_rate": 1.68310322156476e-05, "loss": 0.7034, "step": 8246, "task_loss": 0.832227885723114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.787305474281311, "epoch": 6.97, "learning_rate": 1.6826336057105288e-05, "loss": 0.583, "step": 8247, "task_loss": 0.5000820755958557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3067796230316162, "epoch": 6.97, "learning_rate": 1.6821639898562978e-05, "loss": 0.8775, "step": 8248, "task_loss": 1.24086594581604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6303636431694031, "epoch": 6.97, "learning_rate": 1.6816943740020664e-05, "loss": 0.6281, "step": 8249, "task_loss": 0.9810742139816284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1235582828521729, "epoch": 6.97, "learning_rate": 1.6812247581478354e-05, "loss": 0.9617, "step": 8250, "task_loss": 1.430856466293335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45788443088531494, "epoch": 6.97, "learning_rate": 1.6807551422936037e-05, "loss": 0.5631, "step": 8251, "task_loss": 0.3990844786167145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1790835857391357, "epoch": 6.97, "learning_rate": 1.6802855264393726e-05, "loss": 0.8239, "step": 8252, "task_loss": 1.1584075689315796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8287557363510132, "epoch": 6.98, "learning_rate": 1.6798159105851413e-05, "loss": 0.5925, "step": 8253, "task_loss": 0.13233715295791626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4965192675590515, "epoch": 6.98, "learning_rate": 1.6793462947309102e-05, "loss": 0.5517, "step": 8254, "task_loss": 1.0531268119812012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4549487233161926, "epoch": 6.98, "learning_rate": 1.678876678876679e-05, "loss": 0.6127, "step": 8255, "task_loss": 0.7941056489944458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8727363348007202, "epoch": 6.98, "learning_rate": 1.678407063022448e-05, "loss": 0.697, "step": 8256, "task_loss": 0.9525073766708374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2736925780773163, "epoch": 6.98, "learning_rate": 1.6779374471682165e-05, "loss": 0.5838, "step": 8257, "task_loss": 0.08259619027376175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2182104587554932, "epoch": 6.98, "learning_rate": 1.677467831313985e-05, "loss": 0.8512, "step": 8258, "task_loss": 1.0291330814361572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.513450562953949, "epoch": 6.98, "learning_rate": 1.676998215459754e-05, "loss": 0.9092, "step": 8259, "task_loss": 2.128511428833008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7320982217788696, "epoch": 6.98, "learning_rate": 1.6765285996055227e-05, "loss": 1.0564, "step": 8260, "task_loss": 0.7184269428253174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6437056064605713, "epoch": 6.98, "learning_rate": 1.6760589837512917e-05, "loss": 0.7364, "step": 8261, "task_loss": 0.8189189434051514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9547452330589294, "epoch": 6.98, "learning_rate": 1.6755893678970603e-05, "loss": 0.8484, "step": 8262, "task_loss": 1.5398790836334229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4158810079097748, "epoch": 6.98, "learning_rate": 1.675119752042829e-05, "loss": 0.6011, "step": 8263, "task_loss": 0.42850834131240845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7490745782852173, "epoch": 6.99, "learning_rate": 1.6746501361885976e-05, "loss": 0.844, "step": 8264, "task_loss": 0.7823286652565002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5495190024375916, "epoch": 6.99, "learning_rate": 1.6741805203343666e-05, "loss": 0.8596, "step": 8265, "task_loss": 0.6750435829162598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5783536434173584, "epoch": 6.99, "learning_rate": 1.6737109044801352e-05, "loss": 0.5325, "step": 8266, "task_loss": 0.3463832139968872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.961163341999054, "epoch": 6.99, "learning_rate": 1.6732412886259042e-05, "loss": 0.6873, "step": 8267, "task_loss": 0.8193154335021973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6475191116333008, "epoch": 6.99, "learning_rate": 1.6727716727716728e-05, "loss": 0.5923, "step": 8268, "task_loss": 0.8980156183242798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4674425423145294, "epoch": 6.99, "learning_rate": 1.6723020569174418e-05, "loss": 0.4727, "step": 8269, "task_loss": 0.16705647110939026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6220239400863647, "epoch": 6.99, "learning_rate": 1.67183244106321e-05, "loss": 0.6745, "step": 8270, "task_loss": 0.8287215232849121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7591457366943359, "epoch": 6.99, "learning_rate": 1.671362825208979e-05, "loss": 0.6412, "step": 8271, "task_loss": 0.7139304280281067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9707326889038086, "epoch": 6.99, "learning_rate": 1.670893209354748e-05, "loss": 0.8232, "step": 8272, "task_loss": 1.0399680137634277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34321683645248413, "epoch": 6.99, "learning_rate": 1.6704235935005167e-05, "loss": 0.5511, "step": 8273, "task_loss": 0.5626502633094788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37892258167266846, "epoch": 6.99, "learning_rate": 1.6699539776462856e-05, "loss": 0.5587, "step": 8274, "task_loss": 0.42387497425079346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7037137746810913, "epoch": 6.99, "learning_rate": 1.6694843617920543e-05, "loss": 0.6822, "step": 8275, "task_loss": 0.8615065813064575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6590986251831055, "epoch": 7.0, "learning_rate": 1.669014745937823e-05, "loss": 0.6899, "step": 8276, "task_loss": 0.9620116949081421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3890301585197449, "epoch": 7.0, "learning_rate": 1.6685451300835915e-05, "loss": 0.6269, "step": 8277, "task_loss": 1.3044803142547607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.032088279724121, "epoch": 7.0, "learning_rate": 1.6680755142293605e-05, "loss": 0.6877, "step": 8278, "task_loss": 0.8368021845817566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5454612374305725, "epoch": 7.0, "learning_rate": 1.667605898375129e-05, "loss": 0.6811, "step": 8279, "task_loss": 0.5820830464363098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5220029354095459, "epoch": 7.0, "learning_rate": 1.667136282520898e-05, "loss": 0.596, "step": 8280, "task_loss": 0.3645034730434418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44504159688949585, "epoch": 7.0, "learning_rate": 1.6666666666666667e-05, "loss": 0.7528, "step": 8281, "task_loss": 0.18664802610874176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6502344608306885, "epoch": 7.0, "learning_rate": 1.6661970508124354e-05, "loss": 1.0985, "step": 8282, "task_loss": 1.4238941669464111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6077643036842346, "epoch": 7.0, "learning_rate": 1.665727434958204e-05, "loss": 0.5948, "step": 8283, "task_loss": 0.5179555416107178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5039222836494446, "epoch": 7.0, "learning_rate": 1.665257819103973e-05, "loss": 0.6672, "step": 8284, "task_loss": 0.7050424814224243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7797756195068359, "epoch": 7.0, "learning_rate": 1.6647882032497416e-05, "loss": 0.9649, "step": 8285, "task_loss": 0.7383098602294922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48952555656433105, "epoch": 7.0, "learning_rate": 1.6643185873955106e-05, "loss": 0.5676, "step": 8286, "task_loss": 1.2228586673736572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6753863096237183, "epoch": 7.01, "learning_rate": 1.6638489715412796e-05, "loss": 0.5601, "step": 8287, "task_loss": 1.160298466682434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4866541922092438, "epoch": 7.01, "learning_rate": 1.6633793556870482e-05, "loss": 0.4779, "step": 8288, "task_loss": 0.25374406576156616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4359271228313446, "epoch": 7.01, "learning_rate": 1.662909739832817e-05, "loss": 0.4763, "step": 8289, "task_loss": 0.568440854549408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.608273983001709, "epoch": 7.01, "learning_rate": 1.6624401239785855e-05, "loss": 0.6746, "step": 8290, "task_loss": 0.6689833998680115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9456614851951599, "epoch": 7.01, "learning_rate": 1.6619705081243544e-05, "loss": 0.7818, "step": 8291, "task_loss": 0.9862039089202881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6178123354911804, "epoch": 7.01, "learning_rate": 1.661500892270123e-05, "loss": 0.5275, "step": 8292, "task_loss": 0.6570758819580078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2820464074611664, "epoch": 7.01, "learning_rate": 1.661031276415892e-05, "loss": 0.4432, "step": 8293, "task_loss": 0.2131740003824234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6254476308822632, "epoch": 7.01, "learning_rate": 1.6605616605616607e-05, "loss": 0.6522, "step": 8294, "task_loss": 1.458012342453003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5883931517601013, "epoch": 7.01, "learning_rate": 1.6600920447074293e-05, "loss": 0.589, "step": 8295, "task_loss": 0.25919225811958313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5788252949714661, "epoch": 7.01, "learning_rate": 1.659622428853198e-05, "loss": 0.6665, "step": 8296, "task_loss": 0.08751725405454636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7472037076950073, "epoch": 7.01, "learning_rate": 1.659152812998967e-05, "loss": 0.5891, "step": 8297, "task_loss": 0.9050278067588806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4040442407131195, "epoch": 7.01, "learning_rate": 1.6586831971447356e-05, "loss": 0.4438, "step": 8298, "task_loss": 0.21141250431537628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6897804737091064, "epoch": 7.02, "learning_rate": 1.6582135812905045e-05, "loss": 0.7843, "step": 8299, "task_loss": 0.7828312516212463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.03957998752594, "epoch": 7.02, "learning_rate": 1.657743965436273e-05, "loss": 0.7282, "step": 8300, "task_loss": 1.0141671895980835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5126866698265076, "epoch": 7.02, "learning_rate": 1.657274349582042e-05, "loss": 0.7917, "step": 8301, "task_loss": 0.8189151287078857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.16989266872406, "epoch": 7.02, "learning_rate": 1.6568047337278108e-05, "loss": 0.6627, "step": 8302, "task_loss": 0.6866945028305054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.758991003036499, "epoch": 7.02, "learning_rate": 1.6563351178735794e-05, "loss": 0.6336, "step": 8303, "task_loss": 0.8152180314064026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3982871472835541, "epoch": 7.02, "learning_rate": 1.6558655020193484e-05, "loss": 0.4771, "step": 8304, "task_loss": 0.6050978302955627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9330400228500366, "epoch": 7.02, "learning_rate": 1.655395886165117e-05, "loss": 0.6708, "step": 8305, "task_loss": 1.2489842176437378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7837103605270386, "epoch": 7.02, "learning_rate": 1.654926270310886e-05, "loss": 0.5789, "step": 8306, "task_loss": 0.760562539100647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0025584697723389, "epoch": 7.02, "learning_rate": 1.6544566544566546e-05, "loss": 0.9145, "step": 8307, "task_loss": 0.9524984955787659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6002421379089355, "epoch": 7.02, "learning_rate": 1.6539870386024233e-05, "loss": 0.7849, "step": 8308, "task_loss": 0.577609658241272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9806716442108154, "epoch": 7.02, "learning_rate": 1.653517422748192e-05, "loss": 0.6275, "step": 8309, "task_loss": 0.7225004434585571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.488905668258667, "epoch": 7.02, "learning_rate": 1.653047806893961e-05, "loss": 0.6574, "step": 8310, "task_loss": 0.6594966650009155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.28141048550605774, "epoch": 7.03, "learning_rate": 1.6525781910397295e-05, "loss": 0.4312, "step": 8311, "task_loss": 0.5169705748558044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42205363512039185, "epoch": 7.03, "learning_rate": 1.6521085751854985e-05, "loss": 0.5661, "step": 8312, "task_loss": 0.7096627354621887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.803270697593689, "epoch": 7.03, "learning_rate": 1.651638959331267e-05, "loss": 0.7432, "step": 8313, "task_loss": 0.6377303004264832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34023749828338623, "epoch": 7.03, "learning_rate": 1.6511693434770357e-05, "loss": 0.6384, "step": 8314, "task_loss": 1.1047941446304321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1187307834625244, "epoch": 7.03, "learning_rate": 1.6506997276228044e-05, "loss": 0.7667, "step": 8315, "task_loss": 2.336920976638794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7806664109230042, "epoch": 7.03, "learning_rate": 1.6502301117685733e-05, "loss": 0.6608, "step": 8316, "task_loss": 0.7728365659713745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5386933088302612, "epoch": 7.03, "learning_rate": 1.6497604959143423e-05, "loss": 0.717, "step": 8317, "task_loss": 0.5022688508033752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.19413620233535767, "epoch": 7.03, "learning_rate": 1.649290880060111e-05, "loss": 0.586, "step": 8318, "task_loss": 0.05803046375513077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6542040705680847, "epoch": 7.03, "learning_rate": 1.64882126420588e-05, "loss": 0.7331, "step": 8319, "task_loss": 0.3497411906719208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6511110067367554, "epoch": 7.03, "learning_rate": 1.6483516483516486e-05, "loss": 0.5453, "step": 8320, "task_loss": 0.4386492371559143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1489766836166382, "epoch": 7.03, "learning_rate": 1.6478820324974172e-05, "loss": 0.622, "step": 8321, "task_loss": 1.105839729309082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6330865621566772, "epoch": 7.03, "learning_rate": 1.6474124166431858e-05, "loss": 0.5686, "step": 8322, "task_loss": 1.0909205675125122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6308818459510803, "epoch": 7.04, "learning_rate": 1.6469428007889548e-05, "loss": 0.6751, "step": 8323, "task_loss": 1.7492039203643799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7940860986709595, "epoch": 7.04, "learning_rate": 1.6464731849347234e-05, "loss": 0.5873, "step": 8324, "task_loss": 0.6653805375099182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4419930875301361, "epoch": 7.04, "learning_rate": 1.6460035690804924e-05, "loss": 0.6568, "step": 8325, "task_loss": 0.1515699177980423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2992112934589386, "epoch": 7.04, "learning_rate": 1.645533953226261e-05, "loss": 0.4134, "step": 8326, "task_loss": 0.5442045331001282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7728848457336426, "epoch": 7.04, "learning_rate": 1.6450643373720297e-05, "loss": 0.6668, "step": 8327, "task_loss": 1.492931604385376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8606414794921875, "epoch": 7.04, "learning_rate": 1.6445947215177983e-05, "loss": 0.7217, "step": 8328, "task_loss": 1.680236577987671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48145419359207153, "epoch": 7.04, "learning_rate": 1.6441251056635673e-05, "loss": 0.4247, "step": 8329, "task_loss": 0.514780580997467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8725835084915161, "epoch": 7.04, "learning_rate": 1.643655489809336e-05, "loss": 0.7069, "step": 8330, "task_loss": 1.2081546783447266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8390852212905884, "epoch": 7.04, "learning_rate": 1.643185873955105e-05, "loss": 0.6449, "step": 8331, "task_loss": 0.4814020097255707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4998575747013092, "epoch": 7.04, "learning_rate": 1.6427162581008735e-05, "loss": 0.5315, "step": 8332, "task_loss": 1.022141933441162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5226305723190308, "epoch": 7.04, "learning_rate": 1.642246642246642e-05, "loss": 0.5848, "step": 8333, "task_loss": 0.1537438929080963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7004600763320923, "epoch": 7.04, "learning_rate": 1.641777026392411e-05, "loss": 0.602, "step": 8334, "task_loss": 0.7608137726783752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49842336773872375, "epoch": 7.05, "learning_rate": 1.6413074105381798e-05, "loss": 0.8288, "step": 8335, "task_loss": 0.6164093017578125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5962473750114441, "epoch": 7.05, "learning_rate": 1.6408377946839487e-05, "loss": 0.7081, "step": 8336, "task_loss": 1.7799010276794434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6637512445449829, "epoch": 7.05, "learning_rate": 1.6403681788297174e-05, "loss": 0.5832, "step": 8337, "task_loss": 0.4455660879611969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4863669276237488, "epoch": 7.05, "learning_rate": 1.6398985629754863e-05, "loss": 0.5432, "step": 8338, "task_loss": 0.43332353234291077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.624737560749054, "epoch": 7.05, "learning_rate": 1.639428947121255e-05, "loss": 0.5958, "step": 8339, "task_loss": 1.6449649333953857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37318456172943115, "epoch": 7.05, "learning_rate": 1.6389593312670236e-05, "loss": 0.564, "step": 8340, "task_loss": 0.39244526624679565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7674558162689209, "epoch": 7.05, "learning_rate": 1.6384897154127922e-05, "loss": 0.6242, "step": 8341, "task_loss": 0.9275439381599426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8619047999382019, "epoch": 7.05, "learning_rate": 1.6380200995585612e-05, "loss": 0.8164, "step": 8342, "task_loss": 0.7344034314155579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8801370859146118, "epoch": 7.05, "learning_rate": 1.63755048370433e-05, "loss": 0.6082, "step": 8343, "task_loss": 0.7028793096542358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5054726600646973, "epoch": 7.05, "learning_rate": 1.6370808678500988e-05, "loss": 0.5752, "step": 8344, "task_loss": 0.9318929314613342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6418774127960205, "epoch": 7.05, "learning_rate": 1.6366112519958675e-05, "loss": 0.6554, "step": 8345, "task_loss": 0.8826058506965637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36445021629333496, "epoch": 7.05, "learning_rate": 1.636141636141636e-05, "loss": 0.6502, "step": 8346, "task_loss": 1.0245580673217773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.950870156288147, "epoch": 7.06, "learning_rate": 1.6356720202874047e-05, "loss": 0.6669, "step": 8347, "task_loss": 1.0791813135147095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34343111515045166, "epoch": 7.06, "learning_rate": 1.6352024044331737e-05, "loss": 0.6423, "step": 8348, "task_loss": 0.2882532477378845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0432546138763428, "epoch": 7.06, "learning_rate": 1.6347327885789427e-05, "loss": 0.7643, "step": 8349, "task_loss": 1.1702295541763306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7410948872566223, "epoch": 7.06, "learning_rate": 1.6342631727247113e-05, "loss": 0.5405, "step": 8350, "task_loss": 0.8201940059661865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6432256698608398, "epoch": 7.06, "learning_rate": 1.6337935568704803e-05, "loss": 0.5691, "step": 8351, "task_loss": 0.7317485809326172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7984938025474548, "epoch": 7.06, "learning_rate": 1.633323941016249e-05, "loss": 0.833, "step": 8352, "task_loss": 0.761434018611908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47025352716445923, "epoch": 7.06, "learning_rate": 1.6328543251620175e-05, "loss": 0.4565, "step": 8353, "task_loss": 0.3577621579170227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8376067280769348, "epoch": 7.06, "learning_rate": 1.6323847093077862e-05, "loss": 0.6276, "step": 8354, "task_loss": 0.7452422380447388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.889875590801239, "epoch": 7.06, "learning_rate": 1.631915093453555e-05, "loss": 0.6433, "step": 8355, "task_loss": 0.39320045709609985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.598115861415863, "epoch": 7.06, "learning_rate": 1.6314454775993238e-05, "loss": 0.6141, "step": 8356, "task_loss": 1.0612915754318237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45560139417648315, "epoch": 7.06, "learning_rate": 1.6309758617450928e-05, "loss": 0.634, "step": 8357, "task_loss": 1.0360360145568848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3783850073814392, "epoch": 7.07, "learning_rate": 1.6305062458908614e-05, "loss": 0.516, "step": 8358, "task_loss": 1.0583206415176392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5894360542297363, "epoch": 7.07, "learning_rate": 1.63003663003663e-05, "loss": 0.6911, "step": 8359, "task_loss": 0.9075256586074829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6422885656356812, "epoch": 7.07, "learning_rate": 1.6295670141823987e-05, "loss": 0.6531, "step": 8360, "task_loss": 0.8880829215049744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5823285579681396, "epoch": 7.07, "learning_rate": 1.6290973983281676e-05, "loss": 0.5175, "step": 8361, "task_loss": 1.2031855583190918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5642610192298889, "epoch": 7.07, "learning_rate": 1.6286277824739363e-05, "loss": 0.5971, "step": 8362, "task_loss": 0.22433002293109894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41649356484413147, "epoch": 7.07, "learning_rate": 1.6281581666197052e-05, "loss": 0.5518, "step": 8363, "task_loss": 0.15053656697273254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6419951319694519, "epoch": 7.07, "learning_rate": 1.6276885507654742e-05, "loss": 0.6563, "step": 8364, "task_loss": 0.6875998377799988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3988587558269501, "epoch": 7.07, "learning_rate": 1.6272189349112425e-05, "loss": 0.441, "step": 8365, "task_loss": 0.9046746492385864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4316309690475464, "epoch": 7.07, "learning_rate": 1.6267493190570115e-05, "loss": 0.4352, "step": 8366, "task_loss": 0.11531129479408264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44444364309310913, "epoch": 7.07, "learning_rate": 1.62627970320278e-05, "loss": 0.5511, "step": 8367, "task_loss": 0.13115324079990387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45004701614379883, "epoch": 7.07, "learning_rate": 1.625810087348549e-05, "loss": 0.555, "step": 8368, "task_loss": 0.9146607518196106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.667554497718811, "epoch": 7.07, "learning_rate": 1.6253404714943177e-05, "loss": 0.644, "step": 8369, "task_loss": 0.5812681317329407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7638207077980042, "epoch": 7.08, "learning_rate": 1.6248708556400867e-05, "loss": 0.742, "step": 8370, "task_loss": 0.8764868378639221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9974825978279114, "epoch": 7.08, "learning_rate": 1.6244012397858553e-05, "loss": 0.6716, "step": 8371, "task_loss": 1.5166677236557007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.555535078048706, "epoch": 7.08, "learning_rate": 1.623931623931624e-05, "loss": 0.7365, "step": 8372, "task_loss": 1.8638160228729248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6889716982841492, "epoch": 7.08, "learning_rate": 1.6234620080773926e-05, "loss": 0.7414, "step": 8373, "task_loss": 1.0975788831710815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.61005699634552, "epoch": 7.08, "learning_rate": 1.6229923922231616e-05, "loss": 0.6531, "step": 8374, "task_loss": 0.3742385506629944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6286646127700806, "epoch": 7.08, "learning_rate": 1.6225227763689302e-05, "loss": 0.5924, "step": 8375, "task_loss": 0.14717131853103638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4505949020385742, "epoch": 7.08, "learning_rate": 1.6220531605146992e-05, "loss": 0.6391, "step": 8376, "task_loss": 1.0570091009140015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6255457401275635, "epoch": 7.08, "learning_rate": 1.6215835446604678e-05, "loss": 0.6166, "step": 8377, "task_loss": 0.6403512954711914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.517704963684082, "epoch": 7.08, "learning_rate": 1.6211139288062364e-05, "loss": 0.5966, "step": 8378, "task_loss": 0.8998610377311707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6307387948036194, "epoch": 7.08, "learning_rate": 1.6206443129520054e-05, "loss": 0.6942, "step": 8379, "task_loss": 1.0513458251953125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7460479140281677, "epoch": 7.08, "learning_rate": 1.620174697097774e-05, "loss": 0.6654, "step": 8380, "task_loss": 2.298304557800293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5134311318397522, "epoch": 7.08, "learning_rate": 1.619705081243543e-05, "loss": 0.5199, "step": 8381, "task_loss": 0.9421859383583069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.276017427444458, "epoch": 7.09, "learning_rate": 1.6192354653893117e-05, "loss": 0.763, "step": 8382, "task_loss": 1.5677356719970703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43080559372901917, "epoch": 7.09, "learning_rate": 1.6187658495350806e-05, "loss": 0.7062, "step": 8383, "task_loss": 1.3728878498077393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5517777800559998, "epoch": 7.09, "learning_rate": 1.6182962336808493e-05, "loss": 0.4624, "step": 8384, "task_loss": 1.3030973672866821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48243027925491333, "epoch": 7.09, "learning_rate": 1.617826617826618e-05, "loss": 0.5574, "step": 8385, "task_loss": 0.7228100895881653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4299613833427429, "epoch": 7.09, "learning_rate": 1.6173570019723865e-05, "loss": 0.6274, "step": 8386, "task_loss": 0.4998064637184143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3080787658691406, "epoch": 7.09, "learning_rate": 1.6168873861181555e-05, "loss": 0.8089, "step": 8387, "task_loss": 0.8028414249420166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4941737651824951, "epoch": 7.09, "learning_rate": 1.616417770263924e-05, "loss": 0.5611, "step": 8388, "task_loss": 0.42564764618873596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6533660888671875, "epoch": 7.09, "learning_rate": 1.615948154409693e-05, "loss": 0.4782, "step": 8389, "task_loss": 1.5079699754714966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0695430040359497, "epoch": 7.09, "learning_rate": 1.6154785385554617e-05, "loss": 0.7886, "step": 8390, "task_loss": 0.8381595015525818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9541364908218384, "epoch": 7.09, "learning_rate": 1.6150089227012304e-05, "loss": 0.6617, "step": 8391, "task_loss": 1.5382941961288452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9020836353302002, "epoch": 7.09, "learning_rate": 1.614539306846999e-05, "loss": 0.7346, "step": 8392, "task_loss": 1.0225298404693604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7424600720405579, "epoch": 7.09, "learning_rate": 1.614069690992768e-05, "loss": 0.6769, "step": 8393, "task_loss": 0.5522496104240417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45210638642311096, "epoch": 7.1, "learning_rate": 1.613600075138537e-05, "loss": 0.6861, "step": 8394, "task_loss": 0.9985661506652832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4175128936767578, "epoch": 7.1, "learning_rate": 1.6131304592843056e-05, "loss": 0.51, "step": 8395, "task_loss": 0.4365578591823578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5737032890319824, "epoch": 7.1, "learning_rate": 1.6126608434300746e-05, "loss": 0.6708, "step": 8396, "task_loss": 0.8408185243606567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5210843086242676, "epoch": 7.1, "learning_rate": 1.612191227575843e-05, "loss": 0.69, "step": 8397, "task_loss": 0.8765642642974854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6968522667884827, "epoch": 7.1, "learning_rate": 1.6117216117216118e-05, "loss": 0.6789, "step": 8398, "task_loss": 1.5161992311477661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6142183542251587, "epoch": 7.1, "learning_rate": 1.6112519958673805e-05, "loss": 0.7218, "step": 8399, "task_loss": 1.0332763195037842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8581916689872742, "epoch": 7.1, "learning_rate": 1.6107823800131494e-05, "loss": 0.8579, "step": 8400, "task_loss": 0.56678307056427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8120884895324707, "epoch": 7.1, "learning_rate": 1.610312764158918e-05, "loss": 0.6799, "step": 8401, "task_loss": 0.5686408877372742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5764094591140747, "epoch": 7.1, "learning_rate": 1.609843148304687e-05, "loss": 0.5682, "step": 8402, "task_loss": 0.9655101895332336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4378734827041626, "epoch": 7.1, "learning_rate": 1.6093735324504557e-05, "loss": 0.6974, "step": 8403, "task_loss": 0.8503146171569824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.901645302772522, "epoch": 7.1, "learning_rate": 1.6089039165962243e-05, "loss": 0.7783, "step": 8404, "task_loss": 0.36907869577407837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6843645572662354, "epoch": 7.1, "learning_rate": 1.608434300741993e-05, "loss": 0.5849, "step": 8405, "task_loss": 0.7904179692268372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4477115869522095, "epoch": 7.11, "learning_rate": 1.607964684887762e-05, "loss": 0.773, "step": 8406, "task_loss": 0.9586353302001953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4346562623977661, "epoch": 7.11, "learning_rate": 1.6074950690335306e-05, "loss": 0.5811, "step": 8407, "task_loss": 0.9710264801979065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8149930834770203, "epoch": 7.11, "learning_rate": 1.6070254531792995e-05, "loss": 0.7569, "step": 8408, "task_loss": 1.8348298072814941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32314324378967285, "epoch": 7.11, "learning_rate": 1.606555837325068e-05, "loss": 0.4806, "step": 8409, "task_loss": 1.0773603916168213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39715972542762756, "epoch": 7.11, "learning_rate": 1.6060862214708368e-05, "loss": 0.4104, "step": 8410, "task_loss": 0.4070744812488556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.638953685760498, "epoch": 7.11, "learning_rate": 1.6056166056166058e-05, "loss": 0.6178, "step": 8411, "task_loss": 0.3749602735042572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6178500652313232, "epoch": 7.11, "learning_rate": 1.6051469897623744e-05, "loss": 0.5599, "step": 8412, "task_loss": 0.45613622665405273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45844873785972595, "epoch": 7.11, "learning_rate": 1.6046773739081434e-05, "loss": 0.5365, "step": 8413, "task_loss": 0.9543063044548035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1525447368621826, "epoch": 7.11, "learning_rate": 1.604207758053912e-05, "loss": 0.8429, "step": 8414, "task_loss": 0.720680832862854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5575574636459351, "epoch": 7.11, "learning_rate": 1.603738142199681e-05, "loss": 0.5818, "step": 8415, "task_loss": 0.3662923276424408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3649632930755615, "epoch": 7.11, "learning_rate": 1.6032685263454493e-05, "loss": 0.4245, "step": 8416, "task_loss": 0.7708789706230164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.551699161529541, "epoch": 7.11, "learning_rate": 1.6027989104912182e-05, "loss": 0.5757, "step": 8417, "task_loss": 0.8763657808303833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5807058811187744, "epoch": 7.12, "learning_rate": 1.602329294636987e-05, "loss": 0.6527, "step": 8418, "task_loss": 0.5113093256950378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5879057049751282, "epoch": 7.12, "learning_rate": 1.601859678782756e-05, "loss": 0.6162, "step": 8419, "task_loss": 0.6728661060333252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4376450181007385, "epoch": 7.12, "learning_rate": 1.6013900629285245e-05, "loss": 0.5467, "step": 8420, "task_loss": 0.5431109666824341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8336297273635864, "epoch": 7.12, "learning_rate": 1.6009204470742935e-05, "loss": 0.7097, "step": 8421, "task_loss": 1.0712894201278687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9350837469100952, "epoch": 7.12, "learning_rate": 1.600450831220062e-05, "loss": 0.5893, "step": 8422, "task_loss": 0.8131906986236572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6537396907806396, "epoch": 7.12, "learning_rate": 1.5999812153658307e-05, "loss": 0.8134, "step": 8423, "task_loss": 0.7635588645935059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44841647148132324, "epoch": 7.12, "learning_rate": 1.5995115995115994e-05, "loss": 0.6695, "step": 8424, "task_loss": 0.17292314767837524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3683030605316162, "epoch": 7.12, "learning_rate": 1.5990419836573683e-05, "loss": 0.6197, "step": 8425, "task_loss": 1.1269019842147827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38631582260131836, "epoch": 7.12, "learning_rate": 1.5985723678031373e-05, "loss": 0.5273, "step": 8426, "task_loss": 0.2779548168182373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4844942092895508, "epoch": 7.12, "learning_rate": 1.598102751948906e-05, "loss": 0.5004, "step": 8427, "task_loss": 0.5667625665664673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47933429479599, "epoch": 7.12, "learning_rate": 1.5976331360946746e-05, "loss": 0.7028, "step": 8428, "task_loss": 0.6208500266075134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7707109451293945, "epoch": 7.13, "learning_rate": 1.5971635202404432e-05, "loss": 0.8471, "step": 8429, "task_loss": 1.3797520399093628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49682173132896423, "epoch": 7.13, "learning_rate": 1.5966939043862122e-05, "loss": 0.5778, "step": 8430, "task_loss": 0.8560353517532349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6250436902046204, "epoch": 7.13, "learning_rate": 1.5962242885319808e-05, "loss": 0.6688, "step": 8431, "task_loss": 0.2670261859893799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7405346035957336, "epoch": 7.13, "learning_rate": 1.5957546726777498e-05, "loss": 0.5486, "step": 8432, "task_loss": 0.9232471585273743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5849032998085022, "epoch": 7.13, "learning_rate": 1.5952850568235184e-05, "loss": 0.644, "step": 8433, "task_loss": 0.5956871509552002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.667679488658905, "epoch": 7.13, "learning_rate": 1.5948154409692874e-05, "loss": 0.5129, "step": 8434, "task_loss": 1.123559594154358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.30871522426605225, "epoch": 7.13, "learning_rate": 1.594345825115056e-05, "loss": 0.6514, "step": 8435, "task_loss": 0.4407200813293457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6756477355957031, "epoch": 7.13, "learning_rate": 1.5938762092608247e-05, "loss": 0.5745, "step": 8436, "task_loss": 0.3420315980911255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8109480142593384, "epoch": 7.13, "learning_rate": 1.5934065934065933e-05, "loss": 0.7919, "step": 8437, "task_loss": 0.720430850982666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43559080362319946, "epoch": 7.13, "learning_rate": 1.5929369775523623e-05, "loss": 0.7154, "step": 8438, "task_loss": 0.9615699648857117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5168213844299316, "epoch": 7.13, "learning_rate": 1.592467361698131e-05, "loss": 0.4745, "step": 8439, "task_loss": 0.36289578676223755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5628691911697388, "epoch": 7.13, "learning_rate": 1.5919977458439e-05, "loss": 0.549, "step": 8440, "task_loss": 0.8913969993591309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5128779411315918, "epoch": 7.14, "learning_rate": 1.5915281299896685e-05, "loss": 0.8135, "step": 8441, "task_loss": 0.888882040977478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6194814443588257, "epoch": 7.14, "learning_rate": 1.591058514135437e-05, "loss": 0.6581, "step": 8442, "task_loss": 1.3749377727508545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6620607972145081, "epoch": 7.14, "learning_rate": 1.590588898281206e-05, "loss": 0.6849, "step": 8443, "task_loss": 0.9426718950271606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3187057077884674, "epoch": 7.14, "learning_rate": 1.5901192824269748e-05, "loss": 0.5964, "step": 8444, "task_loss": 0.21625299751758575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6795735359191895, "epoch": 7.14, "learning_rate": 1.5896496665727437e-05, "loss": 0.6711, "step": 8445, "task_loss": 1.5367047786712646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49813857674598694, "epoch": 7.14, "learning_rate": 1.5891800507185124e-05, "loss": 0.7015, "step": 8446, "task_loss": 0.6801850199699402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.002047061920166, "epoch": 7.14, "learning_rate": 1.5887104348642813e-05, "loss": 0.588, "step": 8447, "task_loss": 0.9695881009101868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5033389329910278, "epoch": 7.14, "learning_rate": 1.5882408190100496e-05, "loss": 0.5892, "step": 8448, "task_loss": 1.3173280954360962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3348947763442993, "epoch": 7.14, "learning_rate": 1.5877712031558186e-05, "loss": 0.4687, "step": 8449, "task_loss": 0.1404290795326233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5654045343399048, "epoch": 7.14, "learning_rate": 1.5873015873015872e-05, "loss": 0.5573, "step": 8450, "task_loss": 1.2316793203353882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6739129424095154, "epoch": 7.14, "learning_rate": 1.5868319714473562e-05, "loss": 0.5629, "step": 8451, "task_loss": 0.5001402497291565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.607145369052887, "epoch": 7.14, "learning_rate": 1.586362355593125e-05, "loss": 0.5282, "step": 8452, "task_loss": 0.4207254946231842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6113876700401306, "epoch": 7.15, "learning_rate": 1.5858927397388938e-05, "loss": 0.4617, "step": 8453, "task_loss": 0.18343223631381989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3727980852127075, "epoch": 7.15, "learning_rate": 1.5854231238846624e-05, "loss": 0.5148, "step": 8454, "task_loss": 0.752208948135376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7500145435333252, "epoch": 7.15, "learning_rate": 1.584953508030431e-05, "loss": 0.6993, "step": 8455, "task_loss": 0.6915287375450134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3280147314071655, "epoch": 7.15, "learning_rate": 1.5844838921762e-05, "loss": 0.4768, "step": 8456, "task_loss": 0.35219767689704895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5274670124053955, "epoch": 7.15, "learning_rate": 1.5840142763219687e-05, "loss": 0.773, "step": 8457, "task_loss": 0.9464274048805237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5324335694313049, "epoch": 7.15, "learning_rate": 1.5835446604677377e-05, "loss": 0.6167, "step": 8458, "task_loss": 0.21182024478912354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.692650318145752, "epoch": 7.15, "learning_rate": 1.5830750446135063e-05, "loss": 0.6156, "step": 8459, "task_loss": 0.3627750277519226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5327752232551575, "epoch": 7.15, "learning_rate": 1.582605428759275e-05, "loss": 0.789, "step": 8460, "task_loss": 0.19448968768119812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3733312487602234, "epoch": 7.15, "learning_rate": 1.5821358129050436e-05, "loss": 0.5137, "step": 8461, "task_loss": 0.6345183253288269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.230295181274414, "epoch": 7.15, "learning_rate": 1.5816661970508125e-05, "loss": 0.7319, "step": 8462, "task_loss": 0.63008052110672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.446319341659546, "epoch": 7.15, "learning_rate": 1.581196581196581e-05, "loss": 0.8756, "step": 8463, "task_loss": 1.1854422092437744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9375352263450623, "epoch": 7.15, "learning_rate": 1.58072696534235e-05, "loss": 0.5716, "step": 8464, "task_loss": 0.8416638374328613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27032041549682617, "epoch": 7.16, "learning_rate": 1.5802573494881188e-05, "loss": 0.644, "step": 8465, "task_loss": 0.7652890086174011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36343270540237427, "epoch": 7.16, "learning_rate": 1.5797877336338877e-05, "loss": 0.5476, "step": 8466, "task_loss": 0.30666080117225647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2761046290397644, "epoch": 7.16, "learning_rate": 1.5793181177796564e-05, "loss": 0.4282, "step": 8467, "task_loss": 0.5205836296081543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4645184278488159, "epoch": 7.16, "learning_rate": 1.578848501925425e-05, "loss": 0.5271, "step": 8468, "task_loss": 0.2342080920934677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0183274745941162, "epoch": 7.16, "learning_rate": 1.5783788860711936e-05, "loss": 0.594, "step": 8469, "task_loss": 0.9820103645324707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47965627908706665, "epoch": 7.16, "learning_rate": 1.5779092702169626e-05, "loss": 0.6296, "step": 8470, "task_loss": 0.24095477163791656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9061952233314514, "epoch": 7.16, "learning_rate": 1.5774396543627313e-05, "loss": 0.6819, "step": 8471, "task_loss": 0.39344093203544617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5165402889251709, "epoch": 7.16, "learning_rate": 1.5769700385085002e-05, "loss": 0.5262, "step": 8472, "task_loss": 0.6604724526405334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5275766849517822, "epoch": 7.16, "learning_rate": 1.576500422654269e-05, "loss": 0.4711, "step": 8473, "task_loss": 0.33674946427345276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.754457414150238, "epoch": 7.16, "learning_rate": 1.5760308068000375e-05, "loss": 0.6934, "step": 8474, "task_loss": 1.274428129196167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5970386266708374, "epoch": 7.16, "learning_rate": 1.5755611909458065e-05, "loss": 0.6498, "step": 8475, "task_loss": 0.9834849834442139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5218552350997925, "epoch": 7.16, "learning_rate": 1.575091575091575e-05, "loss": 0.6901, "step": 8476, "task_loss": 0.5797240734100342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8544020056724548, "epoch": 7.17, "learning_rate": 1.574621959237344e-05, "loss": 0.7983, "step": 8477, "task_loss": 1.4951014518737793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7605336904525757, "epoch": 7.17, "learning_rate": 1.5741523433831127e-05, "loss": 0.8409, "step": 8478, "task_loss": 1.218414545059204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4607318341732025, "epoch": 7.17, "learning_rate": 1.5736827275288817e-05, "loss": 0.6012, "step": 8479, "task_loss": 0.8790180087089539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.785438060760498, "epoch": 7.17, "learning_rate": 1.57321311167465e-05, "loss": 0.702, "step": 8480, "task_loss": 0.5534020662307739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5119589567184448, "epoch": 7.17, "learning_rate": 1.572743495820419e-05, "loss": 0.5638, "step": 8481, "task_loss": 0.682572066783905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9186363816261292, "epoch": 7.17, "learning_rate": 1.5722738799661876e-05, "loss": 0.7277, "step": 8482, "task_loss": 1.4431217908859253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7374269962310791, "epoch": 7.17, "learning_rate": 1.5718042641119566e-05, "loss": 0.7225, "step": 8483, "task_loss": 2.058072805404663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7108901739120483, "epoch": 7.17, "learning_rate": 1.5713346482577252e-05, "loss": 0.6751, "step": 8484, "task_loss": 0.20003481209278107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5700558423995972, "epoch": 7.17, "learning_rate": 1.570865032403494e-05, "loss": 0.6978, "step": 8485, "task_loss": 0.6742021441459656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7278664708137512, "epoch": 7.17, "learning_rate": 1.5703954165492628e-05, "loss": 0.6446, "step": 8486, "task_loss": 0.7140081524848938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5009146928787231, "epoch": 7.17, "learning_rate": 1.5699258006950314e-05, "loss": 0.5863, "step": 8487, "task_loss": 0.6376875042915344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6331689357757568, "epoch": 7.17, "learning_rate": 1.5694561848408004e-05, "loss": 0.7396, "step": 8488, "task_loss": 0.6386411786079407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5660057067871094, "epoch": 7.18, "learning_rate": 1.568986568986569e-05, "loss": 0.6395, "step": 8489, "task_loss": 0.46821269392967224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6053882837295532, "epoch": 7.18, "learning_rate": 1.568516953132338e-05, "loss": 0.633, "step": 8490, "task_loss": 0.4715985655784607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.848970890045166, "epoch": 7.18, "learning_rate": 1.5680473372781066e-05, "loss": 0.6529, "step": 8491, "task_loss": 1.1405982971191406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39791154861450195, "epoch": 7.18, "learning_rate": 1.5675777214238753e-05, "loss": 0.5262, "step": 8492, "task_loss": 0.5203255414962769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0977109670639038, "epoch": 7.18, "learning_rate": 1.567108105569644e-05, "loss": 0.7632, "step": 8493, "task_loss": 0.48243066668510437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.589735746383667, "epoch": 7.18, "learning_rate": 1.566638489715413e-05, "loss": 0.5199, "step": 8494, "task_loss": 0.9721176624298096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49540984630584717, "epoch": 7.18, "learning_rate": 1.5661688738611815e-05, "loss": 0.4993, "step": 8495, "task_loss": 0.31848961114883423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3191092014312744, "epoch": 7.18, "learning_rate": 1.5656992580069505e-05, "loss": 0.3948, "step": 8496, "task_loss": 0.0637546256184578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3113805651664734, "epoch": 7.18, "learning_rate": 1.565229642152719e-05, "loss": 0.7182, "step": 8497, "task_loss": 0.07497648149728775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1362136602401733, "epoch": 7.18, "learning_rate": 1.564760026298488e-05, "loss": 0.7412, "step": 8498, "task_loss": 0.4223826229572296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4564676284790039, "epoch": 7.18, "learning_rate": 1.5642904104442564e-05, "loss": 0.7288, "step": 8499, "task_loss": 1.113735318183899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.649864673614502, "epoch": 7.19, "learning_rate": 1.5638207945900254e-05, "loss": 0.6114, "step": 8500, "task_loss": 0.714470386505127 }, { "epoch": 7.19, "eval_accuracy": 0.8978217821782178, "eval_loss": 0.4080207645893097, "eval_runtime": 224.238, "eval_samples_per_second": 112.604, "eval_steps_per_second": 0.883, "step": 8500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0229908227920532, "epoch": 7.19, "learning_rate": 1.563351178735794e-05, "loss": 0.7541, "step": 8501, "task_loss": 1.678572177886963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6783596873283386, "epoch": 7.19, "learning_rate": 1.562881562881563e-05, "loss": 0.7303, "step": 8502, "task_loss": 0.49134084582328796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31080472469329834, "epoch": 7.19, "learning_rate": 1.562411947027332e-05, "loss": 0.5399, "step": 8503, "task_loss": 0.08022446930408478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7152715921401978, "epoch": 7.19, "learning_rate": 1.5619423311731006e-05, "loss": 0.7057, "step": 8504, "task_loss": 1.436632513999939 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8133541941642761, "epoch": 7.19, "learning_rate": 1.5614727153188692e-05, "loss": 0.6429, "step": 8505, "task_loss": 0.8980175852775574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2061183452606201, "epoch": 7.19, "learning_rate": 1.561003099464638e-05, "loss": 0.617, "step": 8506, "task_loss": 0.8913910388946533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9343081116676331, "epoch": 7.19, "learning_rate": 1.5605334836104068e-05, "loss": 0.9262, "step": 8507, "task_loss": 1.109138011932373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6338174343109131, "epoch": 7.19, "learning_rate": 1.5600638677561755e-05, "loss": 0.6176, "step": 8508, "task_loss": 0.08266402035951614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5171859264373779, "epoch": 7.19, "learning_rate": 1.5595942519019444e-05, "loss": 0.536, "step": 8509, "task_loss": 0.49808964133262634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5622588396072388, "epoch": 7.19, "learning_rate": 1.559124636047713e-05, "loss": 0.6467, "step": 8510, "task_loss": 0.8477798104286194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7579199075698853, "epoch": 7.19, "learning_rate": 1.5586550201934817e-05, "loss": 0.8455, "step": 8511, "task_loss": 1.8550200462341309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5777254700660706, "epoch": 7.2, "learning_rate": 1.5581854043392503e-05, "loss": 0.6696, "step": 8512, "task_loss": 1.0440713167190552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4671548306941986, "epoch": 7.2, "learning_rate": 1.5577157884850193e-05, "loss": 0.5772, "step": 8513, "task_loss": 0.4775015115737915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6501567363739014, "epoch": 7.2, "learning_rate": 1.557246172630788e-05, "loss": 0.6229, "step": 8514, "task_loss": 0.8764824867248535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3601847290992737, "epoch": 7.2, "learning_rate": 1.556776556776557e-05, "loss": 0.5145, "step": 8515, "task_loss": 0.6026240587234497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.628470778465271, "epoch": 7.2, "learning_rate": 1.5563069409223255e-05, "loss": 0.6506, "step": 8516, "task_loss": 0.7672469615936279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6323692202568054, "epoch": 7.2, "learning_rate": 1.5558373250680945e-05, "loss": 0.5275, "step": 8517, "task_loss": 0.7017161846160889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5830315947532654, "epoch": 7.2, "learning_rate": 1.555367709213863e-05, "loss": 0.5416, "step": 8518, "task_loss": 0.2081792801618576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29172635078430176, "epoch": 7.2, "learning_rate": 1.5548980933596318e-05, "loss": 0.5314, "step": 8519, "task_loss": 0.3871862590312958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41693639755249023, "epoch": 7.2, "learning_rate": 1.5544284775054008e-05, "loss": 0.5205, "step": 8520, "task_loss": 0.6764258742332458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6178498864173889, "epoch": 7.2, "learning_rate": 1.5539588616511694e-05, "loss": 0.7033, "step": 8521, "task_loss": 0.3892665207386017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5389461517333984, "epoch": 7.2, "learning_rate": 1.5534892457969384e-05, "loss": 0.6837, "step": 8522, "task_loss": 0.6019328236579895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6813217401504517, "epoch": 7.2, "learning_rate": 1.553019629942707e-05, "loss": 0.7196, "step": 8523, "task_loss": 1.0215760469436646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7431827783584595, "epoch": 7.21, "learning_rate": 1.5525500140884756e-05, "loss": 0.572, "step": 8524, "task_loss": 0.829652726650238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4202357530593872, "epoch": 7.21, "learning_rate": 1.5520803982342443e-05, "loss": 0.5403, "step": 8525, "task_loss": 0.10308684408664703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9142799973487854, "epoch": 7.21, "learning_rate": 1.5516107823800132e-05, "loss": 0.5925, "step": 8526, "task_loss": 0.7689386606216431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.750164270401001, "epoch": 7.21, "learning_rate": 1.551141166525782e-05, "loss": 0.7109, "step": 8527, "task_loss": 0.566564679145813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34336546063423157, "epoch": 7.21, "learning_rate": 1.550671550671551e-05, "loss": 0.6525, "step": 8528, "task_loss": 0.3166991174221039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5048114061355591, "epoch": 7.21, "learning_rate": 1.5502019348173195e-05, "loss": 0.6389, "step": 8529, "task_loss": 0.2027001827955246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7511287927627563, "epoch": 7.21, "learning_rate": 1.5497323189630885e-05, "loss": 0.8301, "step": 8530, "task_loss": 1.678208827972412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6882346868515015, "epoch": 7.21, "learning_rate": 1.5492627031088567e-05, "loss": 0.7734, "step": 8531, "task_loss": 0.49128690361976624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6798884272575378, "epoch": 7.21, "learning_rate": 1.5487930872546257e-05, "loss": 0.5541, "step": 8532, "task_loss": 0.812592625617981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.70380038022995, "epoch": 7.21, "learning_rate": 1.5483234714003947e-05, "loss": 0.7628, "step": 8533, "task_loss": 1.4155943393707275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4821310043334961, "epoch": 7.21, "learning_rate": 1.5478538555461633e-05, "loss": 0.5122, "step": 8534, "task_loss": 0.24511407315731049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9768886566162109, "epoch": 7.21, "learning_rate": 1.5473842396919323e-05, "loss": 0.698, "step": 8535, "task_loss": 0.9207010269165039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8698135614395142, "epoch": 7.22, "learning_rate": 1.546914623837701e-05, "loss": 0.7321, "step": 8536, "task_loss": 0.7863072156906128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3215194940567017, "epoch": 7.22, "learning_rate": 1.5464450079834696e-05, "loss": 0.7576, "step": 8537, "task_loss": 0.5825563073158264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6956820487976074, "epoch": 7.22, "learning_rate": 1.5459753921292382e-05, "loss": 0.6221, "step": 8538, "task_loss": 0.6715537309646606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8750174641609192, "epoch": 7.22, "learning_rate": 1.5455057762750072e-05, "loss": 0.803, "step": 8539, "task_loss": 1.9959334135055542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6702337265014648, "epoch": 7.22, "learning_rate": 1.5450361604207758e-05, "loss": 0.5575, "step": 8540, "task_loss": 0.749947726726532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3598354756832123, "epoch": 7.22, "learning_rate": 1.5445665445665448e-05, "loss": 0.5306, "step": 8541, "task_loss": 0.15775103867053986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4799807667732239, "epoch": 7.22, "learning_rate": 1.5440969287123134e-05, "loss": 0.5775, "step": 8542, "task_loss": 0.9306634068489075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8592265844345093, "epoch": 7.22, "learning_rate": 1.543627312858082e-05, "loss": 0.639, "step": 8543, "task_loss": 0.668035626411438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.132265567779541, "epoch": 7.22, "learning_rate": 1.5431576970038507e-05, "loss": 0.7013, "step": 8544, "task_loss": 0.7990521192550659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7347269058227539, "epoch": 7.22, "learning_rate": 1.5426880811496197e-05, "loss": 0.5521, "step": 8545, "task_loss": 0.5428242683410645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7914490699768066, "epoch": 7.22, "learning_rate": 1.5422184652953883e-05, "loss": 0.6828, "step": 8546, "task_loss": 1.2636114358901978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5306199789047241, "epoch": 7.22, "learning_rate": 1.5417488494411573e-05, "loss": 0.5547, "step": 8547, "task_loss": 0.6653650999069214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3252064287662506, "epoch": 7.23, "learning_rate": 1.541279233586926e-05, "loss": 0.5209, "step": 8548, "task_loss": 0.692689061164856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8316682577133179, "epoch": 7.23, "learning_rate": 1.540809617732695e-05, "loss": 0.6511, "step": 8549, "task_loss": 0.7225252985954285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48196732997894287, "epoch": 7.23, "learning_rate": 1.5403400018784635e-05, "loss": 0.577, "step": 8550, "task_loss": 0.518989622592926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5859206914901733, "epoch": 7.23, "learning_rate": 1.539870386024232e-05, "loss": 0.6262, "step": 8551, "task_loss": 0.9314690828323364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6758425831794739, "epoch": 7.23, "learning_rate": 1.539400770170001e-05, "loss": 0.7591, "step": 8552, "task_loss": 0.7835400104522705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6890639662742615, "epoch": 7.23, "learning_rate": 1.5389311543157697e-05, "loss": 0.5316, "step": 8553, "task_loss": 0.595862627029419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4302314221858978, "epoch": 7.23, "learning_rate": 1.5384615384615387e-05, "loss": 0.6177, "step": 8554, "task_loss": 0.30006667971611023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6769499778747559, "epoch": 7.23, "learning_rate": 1.5379919226073074e-05, "loss": 0.6506, "step": 8555, "task_loss": 0.9162425398826599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5038872957229614, "epoch": 7.23, "learning_rate": 1.537522306753076e-05, "loss": 0.6542, "step": 8556, "task_loss": 0.6311954855918884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4482102394104004, "epoch": 7.23, "learning_rate": 1.5370526908988446e-05, "loss": 0.6221, "step": 8557, "task_loss": 1.2924774885177612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5841400623321533, "epoch": 7.23, "learning_rate": 1.5365830750446136e-05, "loss": 0.5306, "step": 8558, "task_loss": 0.549168050289154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4847986698150635, "epoch": 7.23, "learning_rate": 1.5361134591903822e-05, "loss": 0.4936, "step": 8559, "task_loss": 0.9077609777450562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6385740637779236, "epoch": 7.24, "learning_rate": 1.5356438433361512e-05, "loss": 0.558, "step": 8560, "task_loss": 0.33888739347457886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.708033561706543, "epoch": 7.24, "learning_rate": 1.53517422748192e-05, "loss": 0.7442, "step": 8561, "task_loss": 1.1219984292984009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8065342307090759, "epoch": 7.24, "learning_rate": 1.5347046116276888e-05, "loss": 0.7327, "step": 8562, "task_loss": 0.39475739002227783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.30062246322631836, "epoch": 7.24, "learning_rate": 1.534234995773457e-05, "loss": 0.446, "step": 8563, "task_loss": 0.06599705666303635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6404033899307251, "epoch": 7.24, "learning_rate": 1.533765379919226e-05, "loss": 0.678, "step": 8564, "task_loss": 1.267673134803772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.569622278213501, "epoch": 7.24, "learning_rate": 1.533295764064995e-05, "loss": 0.7254, "step": 8565, "task_loss": 0.817385733127594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5420805215835571, "epoch": 7.24, "learning_rate": 1.5328261482107637e-05, "loss": 0.6722, "step": 8566, "task_loss": 0.5400745868682861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.21894174814224243, "epoch": 7.24, "learning_rate": 1.5323565323565327e-05, "loss": 0.4272, "step": 8567, "task_loss": 0.44108980894088745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5279693007469177, "epoch": 7.24, "learning_rate": 1.5318869165023013e-05, "loss": 0.6266, "step": 8568, "task_loss": 0.391032874584198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6457198858261108, "epoch": 7.24, "learning_rate": 1.53141730064807e-05, "loss": 0.5495, "step": 8569, "task_loss": 0.7002415657043457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7270745038986206, "epoch": 7.24, "learning_rate": 1.5309476847938386e-05, "loss": 0.551, "step": 8570, "task_loss": 0.8373616337776184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6755597591400146, "epoch": 7.24, "learning_rate": 1.5304780689396075e-05, "loss": 0.6478, "step": 8571, "task_loss": 0.8892569541931152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6977425217628479, "epoch": 7.25, "learning_rate": 1.530008453085376e-05, "loss": 0.5549, "step": 8572, "task_loss": 0.21879436075687408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40467095375061035, "epoch": 7.25, "learning_rate": 1.529538837231145e-05, "loss": 0.5875, "step": 8573, "task_loss": 1.617908000946045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0152184963226318, "epoch": 7.25, "learning_rate": 1.5290692213769138e-05, "loss": 0.776, "step": 8574, "task_loss": 1.0400949716567993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6703824996948242, "epoch": 7.25, "learning_rate": 1.5285996055226824e-05, "loss": 0.8159, "step": 8575, "task_loss": 1.0030264854431152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7340700626373291, "epoch": 7.25, "learning_rate": 1.528129989668451e-05, "loss": 0.8416, "step": 8576, "task_loss": 0.09539016336202621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5903745889663696, "epoch": 7.25, "learning_rate": 1.52766037381422e-05, "loss": 0.5747, "step": 8577, "task_loss": 0.18387570977210999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8892443776130676, "epoch": 7.25, "learning_rate": 1.5271907579599886e-05, "loss": 0.6558, "step": 8578, "task_loss": 1.5793131589889526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.405023455619812, "epoch": 7.25, "learning_rate": 1.5267211421057576e-05, "loss": 0.6478, "step": 8579, "task_loss": 0.40022894740104675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9647095203399658, "epoch": 7.25, "learning_rate": 1.5262515262515266e-05, "loss": 0.7666, "step": 8580, "task_loss": 1.4580650329589844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8222336173057556, "epoch": 7.25, "learning_rate": 1.525781910397295e-05, "loss": 0.6326, "step": 8581, "task_loss": 0.7679829001426697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0832371711730957, "epoch": 7.25, "learning_rate": 1.525312294543064e-05, "loss": 0.7616, "step": 8582, "task_loss": 1.2990286350250244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3941577672958374, "epoch": 7.26, "learning_rate": 1.5248426786888325e-05, "loss": 0.6258, "step": 8583, "task_loss": 0.39703893661499023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0422340631484985, "epoch": 7.26, "learning_rate": 1.5243730628346015e-05, "loss": 0.6581, "step": 8584, "task_loss": 1.264912486076355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2501692473888397, "epoch": 7.26, "learning_rate": 1.5239034469803701e-05, "loss": 0.4957, "step": 8585, "task_loss": 0.18609358370304108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6305222511291504, "epoch": 7.26, "learning_rate": 1.5234338311261389e-05, "loss": 0.664, "step": 8586, "task_loss": 2.015507698059082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48279237747192383, "epoch": 7.26, "learning_rate": 1.5229642152719075e-05, "loss": 0.5814, "step": 8587, "task_loss": 0.09458732604980469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8836498260498047, "epoch": 7.26, "learning_rate": 1.5224945994176765e-05, "loss": 0.5942, "step": 8588, "task_loss": 0.8669910430908203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1565392017364502, "epoch": 7.26, "learning_rate": 1.5220249835634451e-05, "loss": 0.5725, "step": 8589, "task_loss": 1.7224050760269165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9905585050582886, "epoch": 7.26, "learning_rate": 1.521555367709214e-05, "loss": 0.7595, "step": 8590, "task_loss": 0.8524486422538757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6196993589401245, "epoch": 7.26, "learning_rate": 1.5210857518549826e-05, "loss": 0.6303, "step": 8591, "task_loss": 0.8173340559005737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6534645557403564, "epoch": 7.26, "learning_rate": 1.5206161360007516e-05, "loss": 0.7374, "step": 8592, "task_loss": 0.2974660396575928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4403160810470581, "epoch": 7.26, "learning_rate": 1.52014652014652e-05, "loss": 0.4643, "step": 8593, "task_loss": 0.4736440181732178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9879818558692932, "epoch": 7.26, "learning_rate": 1.519676904292289e-05, "loss": 0.7586, "step": 8594, "task_loss": 1.4375395774841309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4447615146636963, "epoch": 7.27, "learning_rate": 1.5192072884380578e-05, "loss": 0.5593, "step": 8595, "task_loss": 0.2786087393760681 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6289328932762146, "epoch": 7.27, "learning_rate": 1.5187376725838264e-05, "loss": 0.5575, "step": 8596, "task_loss": 0.6930994391441345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5817004442214966, "epoch": 7.27, "learning_rate": 1.5182680567295954e-05, "loss": 0.7733, "step": 8597, "task_loss": 0.8182135820388794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29913434386253357, "epoch": 7.27, "learning_rate": 1.517798440875364e-05, "loss": 0.4468, "step": 8598, "task_loss": 0.08334026485681534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5588014125823975, "epoch": 7.27, "learning_rate": 1.5173288250211328e-05, "loss": 0.6084, "step": 8599, "task_loss": 0.5024418234825134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6327763795852661, "epoch": 7.27, "learning_rate": 1.5168592091669015e-05, "loss": 0.6793, "step": 8600, "task_loss": 1.5190412998199463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4266752600669861, "epoch": 7.27, "learning_rate": 1.5163895933126704e-05, "loss": 0.4433, "step": 8601, "task_loss": 0.2471141517162323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4072551429271698, "epoch": 7.27, "learning_rate": 1.515919977458439e-05, "loss": 0.5701, "step": 8602, "task_loss": 0.19767005741596222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8165024518966675, "epoch": 7.27, "learning_rate": 1.5154503616042079e-05, "loss": 0.6706, "step": 8603, "task_loss": 1.059326171875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.685102105140686, "epoch": 7.27, "learning_rate": 1.5149807457499765e-05, "loss": 0.4356, "step": 8604, "task_loss": 0.5180389881134033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9423735737800598, "epoch": 7.27, "learning_rate": 1.5145111298957453e-05, "loss": 0.6565, "step": 8605, "task_loss": 1.528972864151001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7987930178642273, "epoch": 7.27, "learning_rate": 1.514041514041514e-05, "loss": 0.7302, "step": 8606, "task_loss": 1.0073697566986084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37336233258247375, "epoch": 7.28, "learning_rate": 1.513571898187283e-05, "loss": 0.4966, "step": 8607, "task_loss": 0.33100011944770813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6679459810256958, "epoch": 7.28, "learning_rate": 1.5131022823330516e-05, "loss": 0.7619, "step": 8608, "task_loss": 1.2134203910827637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3497578501701355, "epoch": 7.28, "learning_rate": 1.5126326664788204e-05, "loss": 0.478, "step": 8609, "task_loss": 0.39999428391456604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5500091314315796, "epoch": 7.28, "learning_rate": 1.5121630506245893e-05, "loss": 0.6102, "step": 8610, "task_loss": 0.5785890221595764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5182279348373413, "epoch": 7.28, "learning_rate": 1.511693434770358e-05, "loss": 0.5724, "step": 8611, "task_loss": 0.5513257384300232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5468252897262573, "epoch": 7.28, "learning_rate": 1.5112238189161268e-05, "loss": 0.6066, "step": 8612, "task_loss": 0.18795670568943024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5514140129089355, "epoch": 7.28, "learning_rate": 1.5107542030618954e-05, "loss": 0.625, "step": 8613, "task_loss": 0.6654618978500366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5616681575775146, "epoch": 7.28, "learning_rate": 1.5102845872076644e-05, "loss": 0.5883, "step": 8614, "task_loss": 0.8774051070213318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6344401836395264, "epoch": 7.28, "learning_rate": 1.5098149713534328e-05, "loss": 0.6714, "step": 8615, "task_loss": 0.37859269976615906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7578002214431763, "epoch": 7.28, "learning_rate": 1.5093453554992018e-05, "loss": 0.5649, "step": 8616, "task_loss": 1.6900347471237183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6377581357955933, "epoch": 7.28, "learning_rate": 1.5088757396449705e-05, "loss": 0.6364, "step": 8617, "task_loss": 0.4627823531627655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9815986156463623, "epoch": 7.28, "learning_rate": 1.5084061237907393e-05, "loss": 0.7505, "step": 8618, "task_loss": 0.9275291562080383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3267933130264282, "epoch": 7.29, "learning_rate": 1.5079365079365079e-05, "loss": 0.5317, "step": 8619, "task_loss": 0.16929803788661957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4869334101676941, "epoch": 7.29, "learning_rate": 1.5074668920822769e-05, "loss": 0.5469, "step": 8620, "task_loss": 1.6087143421173096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39577147364616394, "epoch": 7.29, "learning_rate": 1.5069972762280455e-05, "loss": 0.5271, "step": 8621, "task_loss": 0.1910388469696045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8313823938369751, "epoch": 7.29, "learning_rate": 1.5065276603738143e-05, "loss": 0.6288, "step": 8622, "task_loss": 1.0781948566436768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6215670704841614, "epoch": 7.29, "learning_rate": 1.506058044519583e-05, "loss": 0.7198, "step": 8623, "task_loss": 1.4545233249664307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6799923181533813, "epoch": 7.29, "learning_rate": 1.5055884286653519e-05, "loss": 0.6301, "step": 8624, "task_loss": 0.6118623614311218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49168750643730164, "epoch": 7.29, "learning_rate": 1.5051188128111204e-05, "loss": 0.7451, "step": 8625, "task_loss": 0.23292575776576996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42688843607902527, "epoch": 7.29, "learning_rate": 1.5046491969568893e-05, "loss": 0.5609, "step": 8626, "task_loss": 0.5026458501815796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.79167640209198, "epoch": 7.29, "learning_rate": 1.5041795811026581e-05, "loss": 0.7253, "step": 8627, "task_loss": 0.8179715275764465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4300239682197571, "epoch": 7.29, "learning_rate": 1.5037099652484268e-05, "loss": 0.4784, "step": 8628, "task_loss": 0.35154369473457336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5122827291488647, "epoch": 7.29, "learning_rate": 1.5032403493941958e-05, "loss": 0.7008, "step": 8629, "task_loss": 1.315287709236145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1899340152740479, "epoch": 7.29, "learning_rate": 1.5027707335399644e-05, "loss": 0.7441, "step": 8630, "task_loss": 0.508858323097229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5006137490272522, "epoch": 7.3, "learning_rate": 1.5023011176857332e-05, "loss": 0.6431, "step": 8631, "task_loss": 0.5336865782737732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4810575544834137, "epoch": 7.3, "learning_rate": 1.5018315018315018e-05, "loss": 0.461, "step": 8632, "task_loss": 0.14259737730026245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5019371509552002, "epoch": 7.3, "learning_rate": 1.5013618859772708e-05, "loss": 0.5323, "step": 8633, "task_loss": 0.49936383962631226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5216016173362732, "epoch": 7.3, "learning_rate": 1.5008922701230394e-05, "loss": 0.6325, "step": 8634, "task_loss": 0.4511985182762146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7140322923660278, "epoch": 7.3, "learning_rate": 1.5004226542688082e-05, "loss": 0.552, "step": 8635, "task_loss": 1.076370358467102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4771939814090729, "epoch": 7.3, "learning_rate": 1.4999530384145769e-05, "loss": 0.5368, "step": 8636, "task_loss": 1.0276433229446411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4359472393989563, "epoch": 7.3, "learning_rate": 1.4994834225603457e-05, "loss": 0.5446, "step": 8637, "task_loss": 0.42423710227012634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6073223352432251, "epoch": 7.3, "learning_rate": 1.4990138067061143e-05, "loss": 0.5139, "step": 8638, "task_loss": 0.51652991771698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5977014303207397, "epoch": 7.3, "learning_rate": 1.4985441908518833e-05, "loss": 0.6499, "step": 8639, "task_loss": 0.158697247505188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8423666954040527, "epoch": 7.3, "learning_rate": 1.4980745749976519e-05, "loss": 0.7989, "step": 8640, "task_loss": 1.0051206350326538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8206069469451904, "epoch": 7.3, "learning_rate": 1.4976049591434207e-05, "loss": 0.5376, "step": 8641, "task_loss": 0.35068458318710327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.313032865524292, "epoch": 7.3, "learning_rate": 1.4971353432891897e-05, "loss": 0.345, "step": 8642, "task_loss": 0.4769011437892914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8017807602882385, "epoch": 7.31, "learning_rate": 1.4966657274349583e-05, "loss": 0.4996, "step": 8643, "task_loss": 0.4647453725337982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42284175753593445, "epoch": 7.31, "learning_rate": 1.4961961115807271e-05, "loss": 0.5622, "step": 8644, "task_loss": 0.27022185921669006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5740625858306885, "epoch": 7.31, "learning_rate": 1.4957264957264958e-05, "loss": 0.5385, "step": 8645, "task_loss": 0.6401503086090088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6126382350921631, "epoch": 7.31, "learning_rate": 1.4952568798722647e-05, "loss": 0.6177, "step": 8646, "task_loss": 1.7997357845306396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5135424733161926, "epoch": 7.31, "learning_rate": 1.4947872640180332e-05, "loss": 0.6339, "step": 8647, "task_loss": 1.0015673637390137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3813150227069855, "epoch": 7.31, "learning_rate": 1.4943176481638022e-05, "loss": 0.5709, "step": 8648, "task_loss": 0.3557116687297821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5947679877281189, "epoch": 7.31, "learning_rate": 1.4938480323095708e-05, "loss": 0.5861, "step": 8649, "task_loss": 0.8142498135566711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3515886664390564, "epoch": 7.31, "learning_rate": 1.4933784164553396e-05, "loss": 0.4902, "step": 8650, "task_loss": 0.40423908829689026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.247607707977295, "epoch": 7.31, "learning_rate": 1.4929088006011082e-05, "loss": 0.8626, "step": 8651, "task_loss": 1.311596155166626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5247895121574402, "epoch": 7.31, "learning_rate": 1.4924391847468772e-05, "loss": 0.6796, "step": 8652, "task_loss": 0.5806522965431213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4576222598552704, "epoch": 7.31, "learning_rate": 1.4919695688926458e-05, "loss": 0.5052, "step": 8653, "task_loss": 0.9829514622688293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5405381321907043, "epoch": 7.32, "learning_rate": 1.4914999530384147e-05, "loss": 0.464, "step": 8654, "task_loss": 1.0377745628356934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6810047626495361, "epoch": 7.32, "learning_rate": 1.4910303371841833e-05, "loss": 0.4945, "step": 8655, "task_loss": 0.9840075969696045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8807265162467957, "epoch": 7.32, "learning_rate": 1.4905607213299521e-05, "loss": 0.6878, "step": 8656, "task_loss": 0.8644906878471375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36632418632507324, "epoch": 7.32, "learning_rate": 1.490091105475721e-05, "loss": 0.6809, "step": 8657, "task_loss": 0.06560783088207245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4170352816581726, "epoch": 7.32, "learning_rate": 1.4896214896214897e-05, "loss": 0.571, "step": 8658, "task_loss": 0.9332795143127441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4566081762313843, "epoch": 7.32, "learning_rate": 1.4891518737672585e-05, "loss": 0.7306, "step": 8659, "task_loss": 0.7510563731193542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7675580978393555, "epoch": 7.32, "learning_rate": 1.4886822579130271e-05, "loss": 0.7117, "step": 8660, "task_loss": 0.9686407446861267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4080503582954407, "epoch": 7.32, "learning_rate": 1.4882126420587961e-05, "loss": 0.5511, "step": 8661, "task_loss": 0.8816829323768616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6626456379890442, "epoch": 7.32, "learning_rate": 1.4877430262045647e-05, "loss": 0.5364, "step": 8662, "task_loss": 0.3956361711025238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34869396686553955, "epoch": 7.32, "learning_rate": 1.4872734103503335e-05, "loss": 0.5096, "step": 8663, "task_loss": 0.9601772427558899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47662025690078735, "epoch": 7.32, "learning_rate": 1.4868037944961022e-05, "loss": 0.6479, "step": 8664, "task_loss": 0.6861586570739746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4772341847419739, "epoch": 7.32, "learning_rate": 1.4863341786418711e-05, "loss": 0.6173, "step": 8665, "task_loss": 1.1745058298110962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6260519623756409, "epoch": 7.33, "learning_rate": 1.4858645627876396e-05, "loss": 0.6509, "step": 8666, "task_loss": 0.24829542636871338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3415497839450836, "epoch": 7.33, "learning_rate": 1.4853949469334086e-05, "loss": 0.593, "step": 8667, "task_loss": 0.16184093058109283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.73277747631073, "epoch": 7.33, "learning_rate": 1.4849253310791772e-05, "loss": 0.7258, "step": 8668, "task_loss": 0.920946478843689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3385361433029175, "epoch": 7.33, "learning_rate": 1.484455715224946e-05, "loss": 0.6714, "step": 8669, "task_loss": 0.36291420459747314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6483249068260193, "epoch": 7.33, "learning_rate": 1.4839860993707147e-05, "loss": 0.5391, "step": 8670, "task_loss": 1.2380932569503784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5327538251876831, "epoch": 7.33, "learning_rate": 1.4835164835164836e-05, "loss": 0.7993, "step": 8671, "task_loss": 1.5900673866271973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5868451595306396, "epoch": 7.33, "learning_rate": 1.4830468676622524e-05, "loss": 0.4717, "step": 8672, "task_loss": 0.20939752459526062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6211448311805725, "epoch": 7.33, "learning_rate": 1.482577251808021e-05, "loss": 0.7418, "step": 8673, "task_loss": 0.9238471984863281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9104779958724976, "epoch": 7.33, "learning_rate": 1.48210763595379e-05, "loss": 0.6407, "step": 8674, "task_loss": 0.5057417750358582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36681342124938965, "epoch": 7.33, "learning_rate": 1.4816380200995587e-05, "loss": 0.5043, "step": 8675, "task_loss": 0.8075528144836426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0089768171310425, "epoch": 7.33, "learning_rate": 1.4811684042453275e-05, "loss": 0.745, "step": 8676, "task_loss": 0.8894719481468201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6078029870986938, "epoch": 7.33, "learning_rate": 1.4806987883910961e-05, "loss": 0.6343, "step": 8677, "task_loss": 0.5723538398742676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5990428924560547, "epoch": 7.34, "learning_rate": 1.4802291725368649e-05, "loss": 0.5956, "step": 8678, "task_loss": 0.9383125305175781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5819855332374573, "epoch": 7.34, "learning_rate": 1.4797595566826335e-05, "loss": 0.6519, "step": 8679, "task_loss": 1.261618733406067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5023977160453796, "epoch": 7.34, "learning_rate": 1.4792899408284025e-05, "loss": 0.4973, "step": 8680, "task_loss": 0.46303004026412964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6125172972679138, "epoch": 7.34, "learning_rate": 1.4788203249741712e-05, "loss": 0.6906, "step": 8681, "task_loss": 0.8385752439498901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5032504796981812, "epoch": 7.34, "learning_rate": 1.47835070911994e-05, "loss": 0.5788, "step": 8682, "task_loss": 1.3312922716140747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7167471647262573, "epoch": 7.34, "learning_rate": 1.4778810932657086e-05, "loss": 0.7145, "step": 8683, "task_loss": 0.6066780686378479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5598530173301697, "epoch": 7.34, "learning_rate": 1.4774114774114776e-05, "loss": 0.5831, "step": 8684, "task_loss": 0.7980653047561646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0711133480072021, "epoch": 7.34, "learning_rate": 1.4769418615572462e-05, "loss": 0.8369, "step": 8685, "task_loss": 0.8200892806053162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.419253945350647, "epoch": 7.34, "learning_rate": 1.476472245703015e-05, "loss": 0.533, "step": 8686, "task_loss": 0.5212278962135315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7590743899345398, "epoch": 7.34, "learning_rate": 1.476002629848784e-05, "loss": 0.7974, "step": 8687, "task_loss": 0.6681452989578247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49462586641311646, "epoch": 7.34, "learning_rate": 1.4755330139945524e-05, "loss": 0.688, "step": 8688, "task_loss": 0.4733369052410126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5976427793502808, "epoch": 7.34, "learning_rate": 1.4750633981403214e-05, "loss": 0.6288, "step": 8689, "task_loss": 0.31133833527565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3381720781326294, "epoch": 7.35, "learning_rate": 1.47459378228609e-05, "loss": 0.7799, "step": 8690, "task_loss": 1.0052297115325928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5829389095306396, "epoch": 7.35, "learning_rate": 1.4741241664318589e-05, "loss": 0.8418, "step": 8691, "task_loss": 0.9650492668151855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45856502652168274, "epoch": 7.35, "learning_rate": 1.4736545505776275e-05, "loss": 0.6375, "step": 8692, "task_loss": 0.3276419937610626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4250143766403198, "epoch": 7.35, "learning_rate": 1.4731849347233965e-05, "loss": 0.6535, "step": 8693, "task_loss": 0.26523497700691223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7077629566192627, "epoch": 7.35, "learning_rate": 1.4727153188691651e-05, "loss": 0.7196, "step": 8694, "task_loss": 1.2459439039230347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4459773898124695, "epoch": 7.35, "learning_rate": 1.4722457030149339e-05, "loss": 0.6717, "step": 8695, "task_loss": 0.5783897042274475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6658730506896973, "epoch": 7.35, "learning_rate": 1.4717760871607025e-05, "loss": 0.5987, "step": 8696, "task_loss": 0.28184640407562256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6186460256576538, "epoch": 7.35, "learning_rate": 1.4713064713064715e-05, "loss": 0.6097, "step": 8697, "task_loss": 1.0456433296203613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5574439764022827, "epoch": 7.35, "learning_rate": 1.47083685545224e-05, "loss": 0.6361, "step": 8698, "task_loss": 1.108114242553711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7719061374664307, "epoch": 7.35, "learning_rate": 1.470367239598009e-05, "loss": 0.8469, "step": 8699, "task_loss": 1.0705167055130005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6308446526527405, "epoch": 7.35, "learning_rate": 1.4698976237437776e-05, "loss": 0.715, "step": 8700, "task_loss": 0.7752993702888489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6260186433792114, "epoch": 7.35, "learning_rate": 1.4694280078895464e-05, "loss": 0.5566, "step": 8701, "task_loss": 2.60699462890625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9909931421279907, "epoch": 7.36, "learning_rate": 1.468958392035315e-05, "loss": 0.715, "step": 8702, "task_loss": 0.9065481424331665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5917791128158569, "epoch": 7.36, "learning_rate": 1.468488776181084e-05, "loss": 0.5737, "step": 8703, "task_loss": 0.3627135753631592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5598626136779785, "epoch": 7.36, "learning_rate": 1.4680191603268528e-05, "loss": 0.7094, "step": 8704, "task_loss": 0.9408183693885803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6489422917366028, "epoch": 7.36, "learning_rate": 1.4675495444726214e-05, "loss": 0.5346, "step": 8705, "task_loss": 1.0199872255325317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5811317563056946, "epoch": 7.36, "learning_rate": 1.4670799286183904e-05, "loss": 0.6955, "step": 8706, "task_loss": 0.29827454686164856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7950065732002258, "epoch": 7.36, "learning_rate": 1.466610312764159e-05, "loss": 0.5084, "step": 8707, "task_loss": 0.7887353301048279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.570286750793457, "epoch": 7.36, "learning_rate": 1.4661406969099278e-05, "loss": 0.5578, "step": 8708, "task_loss": 0.2591734230518341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.559973955154419, "epoch": 7.36, "learning_rate": 1.4656710810556965e-05, "loss": 0.6452, "step": 8709, "task_loss": 0.8764822483062744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5020166635513306, "epoch": 7.36, "learning_rate": 1.4652014652014653e-05, "loss": 0.5759, "step": 8710, "task_loss": 0.6233774423599243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6865373849868774, "epoch": 7.36, "learning_rate": 1.4647318493472339e-05, "loss": 0.6561, "step": 8711, "task_loss": 0.3447529971599579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7767570614814758, "epoch": 7.36, "learning_rate": 1.4642622334930029e-05, "loss": 0.7077, "step": 8712, "task_loss": 0.5411501526832581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6306108236312866, "epoch": 7.36, "learning_rate": 1.4637926176387715e-05, "loss": 0.5519, "step": 8713, "task_loss": 0.4061191976070404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41873905062675476, "epoch": 7.37, "learning_rate": 1.4633230017845403e-05, "loss": 0.4923, "step": 8714, "task_loss": 0.33628228306770325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43040400743484497, "epoch": 7.37, "learning_rate": 1.462853385930309e-05, "loss": 0.616, "step": 8715, "task_loss": 0.6627652645111084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7572482824325562, "epoch": 7.37, "learning_rate": 1.462383770076078e-05, "loss": 0.5662, "step": 8716, "task_loss": 0.6063724160194397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8530691862106323, "epoch": 7.37, "learning_rate": 1.4619141542218464e-05, "loss": 0.6704, "step": 8717, "task_loss": 0.32099100947380066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0957618951797485, "epoch": 7.37, "learning_rate": 1.4614445383676154e-05, "loss": 0.7617, "step": 8718, "task_loss": 0.5707115530967712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5918462872505188, "epoch": 7.37, "learning_rate": 1.4609749225133843e-05, "loss": 0.6281, "step": 8719, "task_loss": 0.33330950140953064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8655370473861694, "epoch": 7.37, "learning_rate": 1.4605053066591528e-05, "loss": 0.7793, "step": 8720, "task_loss": 0.31686949729919434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6762462258338928, "epoch": 7.37, "learning_rate": 1.4600356908049218e-05, "loss": 0.6979, "step": 8721, "task_loss": 0.33413392305374146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5264249444007874, "epoch": 7.37, "learning_rate": 1.4595660749506904e-05, "loss": 0.7137, "step": 8722, "task_loss": 0.7018445730209351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7352629899978638, "epoch": 7.37, "learning_rate": 1.4590964590964592e-05, "loss": 0.6323, "step": 8723, "task_loss": 0.5016247630119324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2754204273223877, "epoch": 7.37, "learning_rate": 1.4586268432422278e-05, "loss": 0.5686, "step": 8724, "task_loss": 0.3636101186275482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5058496594429016, "epoch": 7.38, "learning_rate": 1.4581572273879968e-05, "loss": 0.6162, "step": 8725, "task_loss": 0.39631661772727966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3858503997325897, "epoch": 7.38, "learning_rate": 1.4576876115337654e-05, "loss": 0.5682, "step": 8726, "task_loss": 0.601839542388916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7353887557983398, "epoch": 7.38, "learning_rate": 1.4572179956795342e-05, "loss": 0.5533, "step": 8727, "task_loss": 0.27309536933898926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5965457558631897, "epoch": 7.38, "learning_rate": 1.4567483798253029e-05, "loss": 0.6169, "step": 8728, "task_loss": 1.2741625308990479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.758279025554657, "epoch": 7.38, "learning_rate": 1.4562787639710717e-05, "loss": 0.5737, "step": 8729, "task_loss": 0.8645638227462769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33526915311813354, "epoch": 7.38, "learning_rate": 1.4558091481168403e-05, "loss": 0.6662, "step": 8730, "task_loss": 0.6171305179595947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.440474271774292, "epoch": 7.38, "learning_rate": 1.4553395322626093e-05, "loss": 0.6267, "step": 8731, "task_loss": 1.332323431968689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5318173170089722, "epoch": 7.38, "learning_rate": 1.454869916408378e-05, "loss": 0.6056, "step": 8732, "task_loss": 0.7781502604484558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8864731788635254, "epoch": 7.38, "learning_rate": 1.4544003005541467e-05, "loss": 0.6764, "step": 8733, "task_loss": 1.3333582878112793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6615207195281982, "epoch": 7.38, "learning_rate": 1.4539306846999157e-05, "loss": 0.5764, "step": 8734, "task_loss": 0.3288297951221466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4870148003101349, "epoch": 7.38, "learning_rate": 1.4534610688456843e-05, "loss": 0.6404, "step": 8735, "task_loss": 1.4009270668029785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33823874592781067, "epoch": 7.38, "learning_rate": 1.4529914529914531e-05, "loss": 0.783, "step": 8736, "task_loss": 0.07921891659498215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9278621673583984, "epoch": 7.39, "learning_rate": 1.4525218371372218e-05, "loss": 0.6822, "step": 8737, "task_loss": 0.7849922776222229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6130959987640381, "epoch": 7.39, "learning_rate": 1.4520522212829907e-05, "loss": 0.5204, "step": 8738, "task_loss": 0.3981631100177765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2862148582935333, "epoch": 7.39, "learning_rate": 1.4515826054287592e-05, "loss": 0.52, "step": 8739, "task_loss": 0.1168980747461319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42131346464157104, "epoch": 7.39, "learning_rate": 1.4511129895745282e-05, "loss": 0.5768, "step": 8740, "task_loss": 0.4664275646209717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45000559091567993, "epoch": 7.39, "learning_rate": 1.4506433737202968e-05, "loss": 0.6852, "step": 8741, "task_loss": 0.3223218023777008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1763768196105957, "epoch": 7.39, "learning_rate": 1.4501737578660656e-05, "loss": 0.6452, "step": 8742, "task_loss": 1.136048436164856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4828723073005676, "epoch": 7.39, "learning_rate": 1.4497041420118343e-05, "loss": 0.6116, "step": 8743, "task_loss": 1.6228692531585693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3417693078517914, "epoch": 7.39, "learning_rate": 1.4492345261576032e-05, "loss": 0.5854, "step": 8744, "task_loss": 0.8045694231987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3826819658279419, "epoch": 7.39, "learning_rate": 1.4487649103033719e-05, "loss": 0.8197, "step": 8745, "task_loss": 1.0323164463043213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.442543625831604, "epoch": 7.39, "learning_rate": 1.4482952944491407e-05, "loss": 0.6931, "step": 8746, "task_loss": 0.44920486211776733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7994245886802673, "epoch": 7.39, "learning_rate": 1.4478256785949093e-05, "loss": 0.5101, "step": 8747, "task_loss": 0.6426541805267334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4189307689666748, "epoch": 7.39, "learning_rate": 1.4473560627406783e-05, "loss": 0.6055, "step": 8748, "task_loss": 1.2174941301345825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8121612668037415, "epoch": 7.4, "learning_rate": 1.446886446886447e-05, "loss": 0.7318, "step": 8749, "task_loss": 1.1486973762512207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43366891145706177, "epoch": 7.4, "learning_rate": 1.4464168310322157e-05, "loss": 0.5278, "step": 8750, "task_loss": 0.31724467873573303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6896811723709106, "epoch": 7.4, "learning_rate": 1.4459472151779845e-05, "loss": 0.6021, "step": 8751, "task_loss": 0.2962948679924011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6598345041275024, "epoch": 7.4, "learning_rate": 1.4454775993237531e-05, "loss": 0.6701, "step": 8752, "task_loss": 0.3532833456993103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5777826309204102, "epoch": 7.4, "learning_rate": 1.4450079834695221e-05, "loss": 0.5912, "step": 8753, "task_loss": 0.6421531438827515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9643003940582275, "epoch": 7.4, "learning_rate": 1.4445383676152908e-05, "loss": 0.69, "step": 8754, "task_loss": 0.9302046895027161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5176678895950317, "epoch": 7.4, "learning_rate": 1.4440687517610596e-05, "loss": 0.5936, "step": 8755, "task_loss": 0.4919664263725281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9891716837882996, "epoch": 7.4, "learning_rate": 1.4435991359068282e-05, "loss": 0.7642, "step": 8756, "task_loss": 0.8850867748260498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5061391592025757, "epoch": 7.4, "learning_rate": 1.4431295200525972e-05, "loss": 0.4883, "step": 8757, "task_loss": 0.52427077293396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8851548433303833, "epoch": 7.4, "learning_rate": 1.4426599041983658e-05, "loss": 0.83, "step": 8758, "task_loss": 0.8806981444358826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9499384164810181, "epoch": 7.4, "learning_rate": 1.4421902883441346e-05, "loss": 0.8007, "step": 8759, "task_loss": 2.2785723209381104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7223091125488281, "epoch": 7.4, "learning_rate": 1.4417206724899032e-05, "loss": 0.6261, "step": 8760, "task_loss": 1.8273206949234009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6467049717903137, "epoch": 7.41, "learning_rate": 1.441251056635672e-05, "loss": 0.5885, "step": 8761, "task_loss": 0.8479917049407959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7820430994033813, "epoch": 7.41, "learning_rate": 1.4407814407814407e-05, "loss": 0.7098, "step": 8762, "task_loss": 1.0512365102767944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.804335355758667, "epoch": 7.41, "learning_rate": 1.4403118249272096e-05, "loss": 0.6179, "step": 8763, "task_loss": 1.4140928983688354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47770431637763977, "epoch": 7.41, "learning_rate": 1.4398422090729784e-05, "loss": 0.7003, "step": 8764, "task_loss": 0.5422632694244385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5327218770980835, "epoch": 7.41, "learning_rate": 1.439372593218747e-05, "loss": 0.7292, "step": 8765, "task_loss": 0.32303598523139954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8065009713172913, "epoch": 7.41, "learning_rate": 1.438902977364516e-05, "loss": 0.7197, "step": 8766, "task_loss": 1.19415283203125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7318686246871948, "epoch": 7.41, "learning_rate": 1.4384333615102847e-05, "loss": 0.6351, "step": 8767, "task_loss": 0.8286168575286865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3391219675540924, "epoch": 7.41, "learning_rate": 1.4379637456560535e-05, "loss": 0.627, "step": 8768, "task_loss": 0.5470843315124512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6095497608184814, "epoch": 7.41, "learning_rate": 1.4374941298018221e-05, "loss": 0.5929, "step": 8769, "task_loss": 0.27946048974990845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4815196990966797, "epoch": 7.41, "learning_rate": 1.4370245139475911e-05, "loss": 0.5957, "step": 8770, "task_loss": 0.2822089195251465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3761410117149353, "epoch": 7.41, "learning_rate": 1.4365548980933596e-05, "loss": 0.548, "step": 8771, "task_loss": 0.4711951017379761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4949633479118347, "epoch": 7.41, "learning_rate": 1.4360852822391285e-05, "loss": 0.5927, "step": 8772, "task_loss": 1.4139137268066406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5867195129394531, "epoch": 7.42, "learning_rate": 1.4356156663848972e-05, "loss": 0.5805, "step": 8773, "task_loss": 0.3959139287471771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8065346479415894, "epoch": 7.42, "learning_rate": 1.435146050530666e-05, "loss": 0.7246, "step": 8774, "task_loss": 0.556391179561615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49811840057373047, "epoch": 7.42, "learning_rate": 1.4346764346764346e-05, "loss": 0.7105, "step": 8775, "task_loss": 0.1544678509235382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7989290952682495, "epoch": 7.42, "learning_rate": 1.4342068188222036e-05, "loss": 0.6189, "step": 8776, "task_loss": 1.0833930969238281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3572825491428375, "epoch": 7.42, "learning_rate": 1.4337372029679722e-05, "loss": 0.5584, "step": 8777, "task_loss": 1.1416620016098022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7293670177459717, "epoch": 7.42, "learning_rate": 1.433267587113741e-05, "loss": 0.7864, "step": 8778, "task_loss": 0.827947199344635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4423123896121979, "epoch": 7.42, "learning_rate": 1.4327979712595097e-05, "loss": 0.4323, "step": 8779, "task_loss": 0.19232335686683655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6435209512710571, "epoch": 7.42, "learning_rate": 1.4323283554052786e-05, "loss": 0.6674, "step": 8780, "task_loss": 0.5760380625724792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6504685878753662, "epoch": 7.42, "learning_rate": 1.4318587395510474e-05, "loss": 0.6286, "step": 8781, "task_loss": 0.3659413754940033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8599176406860352, "epoch": 7.42, "learning_rate": 1.431389123696816e-05, "loss": 0.6315, "step": 8782, "task_loss": 0.7786250114440918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33984851837158203, "epoch": 7.42, "learning_rate": 1.4309195078425849e-05, "loss": 0.6134, "step": 8783, "task_loss": 0.3606584668159485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5017727017402649, "epoch": 7.42, "learning_rate": 1.4304498919883535e-05, "loss": 0.5581, "step": 8784, "task_loss": 0.7546937465667725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.548828125, "epoch": 7.43, "learning_rate": 1.4299802761341225e-05, "loss": 0.7064, "step": 8785, "task_loss": 0.6197872161865234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49103614687919617, "epoch": 7.43, "learning_rate": 1.4295106602798911e-05, "loss": 0.6121, "step": 8786, "task_loss": 0.7425150275230408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0072267055511475, "epoch": 7.43, "learning_rate": 1.4290410444256599e-05, "loss": 0.7122, "step": 8787, "task_loss": 0.9671074748039246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31548112630844116, "epoch": 7.43, "learning_rate": 1.4285714285714285e-05, "loss": 0.6613, "step": 8788, "task_loss": 0.33679962158203125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6291132569313049, "epoch": 7.43, "learning_rate": 1.4281018127171975e-05, "loss": 0.5602, "step": 8789, "task_loss": 0.7900794744491577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3461541533470154, "epoch": 7.43, "learning_rate": 1.427632196862966e-05, "loss": 0.5664, "step": 8790, "task_loss": 0.30411767959594727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3664182722568512, "epoch": 7.43, "learning_rate": 1.427162581008735e-05, "loss": 0.4951, "step": 8791, "task_loss": 0.32132384181022644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7198801636695862, "epoch": 7.43, "learning_rate": 1.4266929651545036e-05, "loss": 0.5281, "step": 8792, "task_loss": 0.862074077129364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5497862100601196, "epoch": 7.43, "learning_rate": 1.4262233493002724e-05, "loss": 0.5627, "step": 8793, "task_loss": 0.19390201568603516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48183682560920715, "epoch": 7.43, "learning_rate": 1.425753733446041e-05, "loss": 0.628, "step": 8794, "task_loss": 0.9020749926567078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7595188617706299, "epoch": 7.43, "learning_rate": 1.42528411759181e-05, "loss": 0.6188, "step": 8795, "task_loss": 1.024613380432129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37002331018447876, "epoch": 7.44, "learning_rate": 1.4248145017375788e-05, "loss": 0.6006, "step": 8796, "task_loss": 0.22668834030628204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6443023681640625, "epoch": 7.44, "learning_rate": 1.4243448858833474e-05, "loss": 0.5524, "step": 8797, "task_loss": 0.18433637917041779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5004959106445312, "epoch": 7.44, "learning_rate": 1.4238752700291164e-05, "loss": 0.6531, "step": 8798, "task_loss": 0.0589759387075901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4933279752731323, "epoch": 7.44, "learning_rate": 1.423405654174885e-05, "loss": 0.5664, "step": 8799, "task_loss": 0.8993399739265442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42491069436073303, "epoch": 7.44, "learning_rate": 1.4229360383206538e-05, "loss": 0.5285, "step": 8800, "task_loss": 0.3228122889995575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.5442649126052856, "epoch": 7.44, "learning_rate": 1.4224664224664225e-05, "loss": 0.9703, "step": 8801, "task_loss": 1.9371752738952637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8439143896102905, "epoch": 7.44, "learning_rate": 1.4219968066121915e-05, "loss": 0.9486, "step": 8802, "task_loss": 1.6801105737686157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4760844111442566, "epoch": 7.44, "learning_rate": 1.42152719075796e-05, "loss": 0.4787, "step": 8803, "task_loss": 0.5227742791175842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.818293571472168, "epoch": 7.44, "learning_rate": 1.4210575749037289e-05, "loss": 0.6348, "step": 8804, "task_loss": 0.45043841004371643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3549439013004303, "epoch": 7.44, "learning_rate": 1.4205879590494975e-05, "loss": 0.4733, "step": 8805, "task_loss": 0.44276851415634155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.326800137758255, "epoch": 7.44, "learning_rate": 1.4201183431952663e-05, "loss": 0.66, "step": 8806, "task_loss": 0.6556340456008911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9696972370147705, "epoch": 7.44, "learning_rate": 1.419648727341035e-05, "loss": 0.6379, "step": 8807, "task_loss": 0.7148184180259705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6049702167510986, "epoch": 7.45, "learning_rate": 1.419179111486804e-05, "loss": 0.7258, "step": 8808, "task_loss": 1.0771348476409912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.597672164440155, "epoch": 7.45, "learning_rate": 1.4187094956325726e-05, "loss": 0.534, "step": 8809, "task_loss": 0.531308650970459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4485315680503845, "epoch": 7.45, "learning_rate": 1.4182398797783414e-05, "loss": 0.6665, "step": 8810, "task_loss": 0.23535816371440887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3912915885448456, "epoch": 7.45, "learning_rate": 1.4177702639241103e-05, "loss": 0.538, "step": 8811, "task_loss": 0.37026292085647583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9644006490707397, "epoch": 7.45, "learning_rate": 1.4173006480698788e-05, "loss": 0.6826, "step": 8812, "task_loss": 1.3084909915924072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1315717697143555, "epoch": 7.45, "learning_rate": 1.4168310322156478e-05, "loss": 0.6782, "step": 8813, "task_loss": 1.4765267372131348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6742309927940369, "epoch": 7.45, "learning_rate": 1.4163614163614164e-05, "loss": 0.6789, "step": 8814, "task_loss": 1.3087055683135986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3168940246105194, "epoch": 7.45, "learning_rate": 1.4158918005071852e-05, "loss": 0.4886, "step": 8815, "task_loss": 0.7416024208068848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4222475588321686, "epoch": 7.45, "learning_rate": 1.4154221846529539e-05, "loss": 0.5256, "step": 8816, "task_loss": 0.7804904580116272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5222935676574707, "epoch": 7.45, "learning_rate": 1.4149525687987228e-05, "loss": 0.5864, "step": 8817, "task_loss": 1.4211033582687378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.798425555229187, "epoch": 7.45, "learning_rate": 1.4144829529444915e-05, "loss": 0.6485, "step": 8818, "task_loss": 1.1012928485870361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6438949108123779, "epoch": 7.45, "learning_rate": 1.4140133370902603e-05, "loss": 0.7879, "step": 8819, "task_loss": 0.5425367951393127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5253055691719055, "epoch": 7.46, "learning_rate": 1.4135437212360289e-05, "loss": 0.4993, "step": 8820, "task_loss": 0.32584404945373535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26584291458129883, "epoch": 7.46, "learning_rate": 1.4130741053817979e-05, "loss": 0.5878, "step": 8821, "task_loss": 0.5148237347602844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7537709474563599, "epoch": 7.46, "learning_rate": 1.4126044895275663e-05, "loss": 0.6532, "step": 8822, "task_loss": 0.47954994440078735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7706807851791382, "epoch": 7.46, "learning_rate": 1.4121348736733353e-05, "loss": 0.5393, "step": 8823, "task_loss": 0.5625463724136353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5806363821029663, "epoch": 7.46, "learning_rate": 1.411665257819104e-05, "loss": 0.6051, "step": 8824, "task_loss": 1.3116568326950073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3402213156223297, "epoch": 7.46, "learning_rate": 1.4111956419648727e-05, "loss": 0.5458, "step": 8825, "task_loss": 1.0510114431381226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6571834087371826, "epoch": 7.46, "learning_rate": 1.4107260261106417e-05, "loss": 0.5708, "step": 8826, "task_loss": 0.7167559266090393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.807671070098877, "epoch": 7.46, "learning_rate": 1.4102564102564104e-05, "loss": 0.6064, "step": 8827, "task_loss": 0.7960857152938843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5515429377555847, "epoch": 7.46, "learning_rate": 1.4097867944021792e-05, "loss": 0.558, "step": 8828, "task_loss": 0.21235883235931396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9912184476852417, "epoch": 7.46, "learning_rate": 1.4093171785479478e-05, "loss": 0.7407, "step": 8829, "task_loss": 0.48920032382011414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9062431454658508, "epoch": 7.46, "learning_rate": 1.4088475626937168e-05, "loss": 0.6931, "step": 8830, "task_loss": 1.5853805541992188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6432632803916931, "epoch": 7.46, "learning_rate": 1.4083779468394854e-05, "loss": 0.6483, "step": 8831, "task_loss": 0.49787449836730957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29538118839263916, "epoch": 7.47, "learning_rate": 1.4079083309852542e-05, "loss": 0.7014, "step": 8832, "task_loss": 0.07086777687072754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8068115711212158, "epoch": 7.47, "learning_rate": 1.4074387151310228e-05, "loss": 0.5608, "step": 8833, "task_loss": 0.9117980599403381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5897228121757507, "epoch": 7.47, "learning_rate": 1.4069690992767916e-05, "loss": 0.604, "step": 8834, "task_loss": 0.7965760231018066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2771298885345459, "epoch": 7.47, "learning_rate": 1.4064994834225603e-05, "loss": 0.4351, "step": 8835, "task_loss": 0.4141058623790741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6139661073684692, "epoch": 7.47, "learning_rate": 1.4060298675683292e-05, "loss": 0.5701, "step": 8836, "task_loss": 0.789925217628479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9174843430519104, "epoch": 7.47, "learning_rate": 1.4055602517140979e-05, "loss": 0.7744, "step": 8837, "task_loss": 0.4499000906944275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9140945672988892, "epoch": 7.47, "learning_rate": 1.4050906358598667e-05, "loss": 0.7063, "step": 8838, "task_loss": 0.9110105633735657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5736308097839355, "epoch": 7.47, "learning_rate": 1.4046210200056353e-05, "loss": 0.4752, "step": 8839, "task_loss": 0.8866455554962158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.578416109085083, "epoch": 7.47, "learning_rate": 1.4041514041514043e-05, "loss": 0.4165, "step": 8840, "task_loss": 0.3861362040042877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7147908210754395, "epoch": 7.47, "learning_rate": 1.4036817882971731e-05, "loss": 0.6751, "step": 8841, "task_loss": 1.0191805362701416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44898921251296997, "epoch": 7.47, "learning_rate": 1.4032121724429417e-05, "loss": 0.556, "step": 8842, "task_loss": 1.380510926246643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0752756595611572, "epoch": 7.47, "learning_rate": 1.4027425565887107e-05, "loss": 0.8416, "step": 8843, "task_loss": 1.670526385307312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.682672917842865, "epoch": 7.48, "learning_rate": 1.4022729407344792e-05, "loss": 0.7839, "step": 8844, "task_loss": 1.6672682762145996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6933287382125854, "epoch": 7.48, "learning_rate": 1.4018033248802481e-05, "loss": 0.6687, "step": 8845, "task_loss": 0.6702417731285095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27506935596466064, "epoch": 7.48, "learning_rate": 1.4013337090260168e-05, "loss": 0.6482, "step": 8846, "task_loss": 0.8135870695114136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7129396200180054, "epoch": 7.48, "learning_rate": 1.4008640931717856e-05, "loss": 0.6735, "step": 8847, "task_loss": 0.9490268230438232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6656155586242676, "epoch": 7.48, "learning_rate": 1.4003944773175542e-05, "loss": 0.6641, "step": 8848, "task_loss": 1.1046116352081299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46516433358192444, "epoch": 7.48, "learning_rate": 1.3999248614633232e-05, "loss": 0.6792, "step": 8849, "task_loss": 0.4096527099609375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7510248422622681, "epoch": 7.48, "learning_rate": 1.3994552456090918e-05, "loss": 0.6332, "step": 8850, "task_loss": 0.6365059018135071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5387401580810547, "epoch": 7.48, "learning_rate": 1.3989856297548606e-05, "loss": 0.6242, "step": 8851, "task_loss": 0.8759180903434753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48016858100891113, "epoch": 7.48, "learning_rate": 1.3985160139006292e-05, "loss": 0.4592, "step": 8852, "task_loss": 0.7282658815383911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6573927998542786, "epoch": 7.48, "learning_rate": 1.3980463980463982e-05, "loss": 0.6056, "step": 8853, "task_loss": 0.66485595703125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3277650475502014, "epoch": 7.48, "learning_rate": 1.3975767821921667e-05, "loss": 0.5259, "step": 8854, "task_loss": 0.29656070470809937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6897794008255005, "epoch": 7.48, "learning_rate": 1.3971071663379357e-05, "loss": 0.6865, "step": 8855, "task_loss": 0.8608306646347046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7923409938812256, "epoch": 7.49, "learning_rate": 1.3966375504837043e-05, "loss": 0.596, "step": 8856, "task_loss": 0.743945300579071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49515748023986816, "epoch": 7.49, "learning_rate": 1.3961679346294731e-05, "loss": 0.4291, "step": 8857, "task_loss": 0.5723106861114502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2897806167602539, "epoch": 7.49, "learning_rate": 1.395698318775242e-05, "loss": 0.543, "step": 8858, "task_loss": 0.4049126207828522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42775845527648926, "epoch": 7.49, "learning_rate": 1.3952287029210107e-05, "loss": 0.5669, "step": 8859, "task_loss": 0.25260016322135925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5343738794326782, "epoch": 7.49, "learning_rate": 1.3947590870667795e-05, "loss": 0.7521, "step": 8860, "task_loss": 0.8195265531539917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7825603485107422, "epoch": 7.49, "learning_rate": 1.3942894712125481e-05, "loss": 0.482, "step": 8861, "task_loss": 0.6050991415977478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7811604738235474, "epoch": 7.49, "learning_rate": 1.3938198553583171e-05, "loss": 0.765, "step": 8862, "task_loss": 1.0392000675201416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5027352571487427, "epoch": 7.49, "learning_rate": 1.3933502395040857e-05, "loss": 0.6916, "step": 8863, "task_loss": 1.0449659824371338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7661229968070984, "epoch": 7.49, "learning_rate": 1.3928806236498546e-05, "loss": 0.5333, "step": 8864, "task_loss": 0.2767099440097809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.30622315406799316, "epoch": 7.49, "learning_rate": 1.3924110077956232e-05, "loss": 0.6944, "step": 8865, "task_loss": 0.08167361468076706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.420692503452301, "epoch": 7.49, "learning_rate": 1.391941391941392e-05, "loss": 0.5889, "step": 8866, "task_loss": 0.5782320499420166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7817936539649963, "epoch": 7.5, "learning_rate": 1.3914717760871606e-05, "loss": 0.675, "step": 8867, "task_loss": 0.46111050248146057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7293233871459961, "epoch": 7.5, "learning_rate": 1.3910021602329296e-05, "loss": 0.5758, "step": 8868, "task_loss": 1.5446324348449707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6156920194625854, "epoch": 7.5, "learning_rate": 1.3905325443786982e-05, "loss": 0.6627, "step": 8869, "task_loss": 0.3688816726207733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6494438648223877, "epoch": 7.5, "learning_rate": 1.390062928524467e-05, "loss": 0.6045, "step": 8870, "task_loss": 0.8621974587440491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44929176568984985, "epoch": 7.5, "learning_rate": 1.3895933126702357e-05, "loss": 0.5614, "step": 8871, "task_loss": 0.30997002124786377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3734612464904785, "epoch": 7.5, "learning_rate": 1.3891236968160046e-05, "loss": 0.5195, "step": 8872, "task_loss": 0.38872459530830383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47571858763694763, "epoch": 7.5, "learning_rate": 1.3886540809617734e-05, "loss": 0.5643, "step": 8873, "task_loss": 0.7035385966300964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6912674307823181, "epoch": 7.5, "learning_rate": 1.388184465107542e-05, "loss": 0.747, "step": 8874, "task_loss": 1.3268496990203857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.24334104359149933, "epoch": 7.5, "learning_rate": 1.387714849253311e-05, "loss": 0.5415, "step": 8875, "task_loss": 0.03859691694378853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36318013072013855, "epoch": 7.5, "learning_rate": 1.3872452333990795e-05, "loss": 0.608, "step": 8876, "task_loss": 0.20136430859565735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5649789571762085, "epoch": 7.5, "learning_rate": 1.3867756175448485e-05, "loss": 0.7508, "step": 8877, "task_loss": 0.662899911403656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5768246650695801, "epoch": 7.5, "learning_rate": 1.3863060016906171e-05, "loss": 0.558, "step": 8878, "task_loss": 0.1976538747549057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5126442313194275, "epoch": 7.51, "learning_rate": 1.385836385836386e-05, "loss": 0.5641, "step": 8879, "task_loss": 0.3186753988265991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3110091984272003, "epoch": 7.51, "learning_rate": 1.3853667699821546e-05, "loss": 0.5947, "step": 8880, "task_loss": 0.7356928586959839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5401023030281067, "epoch": 7.51, "learning_rate": 1.3848971541279235e-05, "loss": 0.4636, "step": 8881, "task_loss": 0.6640371680259705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49347123503685, "epoch": 7.51, "learning_rate": 1.3844275382736922e-05, "loss": 0.9608, "step": 8882, "task_loss": 0.2183755785226822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5438917875289917, "epoch": 7.51, "learning_rate": 1.383957922419461e-05, "loss": 0.5896, "step": 8883, "task_loss": 0.7835413813591003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6133314371109009, "epoch": 7.51, "learning_rate": 1.3834883065652296e-05, "loss": 0.6516, "step": 8884, "task_loss": 0.8716408014297485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5653834939002991, "epoch": 7.51, "learning_rate": 1.3830186907109984e-05, "loss": 0.4566, "step": 8885, "task_loss": 0.7486401200294495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3579946756362915, "epoch": 7.51, "learning_rate": 1.382549074856767e-05, "loss": 0.496, "step": 8886, "task_loss": 1.083166241645813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4328920245170593, "epoch": 7.51, "learning_rate": 1.382079459002536e-05, "loss": 0.6986, "step": 8887, "task_loss": 0.26051589846611023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6244134306907654, "epoch": 7.51, "learning_rate": 1.3816098431483048e-05, "loss": 0.5116, "step": 8888, "task_loss": 0.9129281640052795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.28497618436813354, "epoch": 7.51, "learning_rate": 1.3811402272940734e-05, "loss": 0.5349, "step": 8889, "task_loss": 0.18349602818489075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7336397171020508, "epoch": 7.51, "learning_rate": 1.3806706114398424e-05, "loss": 0.5409, "step": 8890, "task_loss": 0.514167070388794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38443365693092346, "epoch": 7.52, "learning_rate": 1.380200995585611e-05, "loss": 0.5755, "step": 8891, "task_loss": 0.301582396030426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6771820783615112, "epoch": 7.52, "learning_rate": 1.3797313797313799e-05, "loss": 0.5768, "step": 8892, "task_loss": 0.17118817567825317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.621345043182373, "epoch": 7.52, "learning_rate": 1.3792617638771485e-05, "loss": 0.5382, "step": 8893, "task_loss": 0.891568124294281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48554375767707825, "epoch": 7.52, "learning_rate": 1.3787921480229175e-05, "loss": 0.5333, "step": 8894, "task_loss": 0.621101975440979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7984272241592407, "epoch": 7.52, "learning_rate": 1.378322532168686e-05, "loss": 0.8557, "step": 8895, "task_loss": 0.8120532631874084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7631176710128784, "epoch": 7.52, "learning_rate": 1.3778529163144549e-05, "loss": 0.5834, "step": 8896, "task_loss": 0.6807193756103516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7292428016662598, "epoch": 7.52, "learning_rate": 1.3773833004602235e-05, "loss": 0.5496, "step": 8897, "task_loss": 0.5467071533203125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47873324155807495, "epoch": 7.52, "learning_rate": 1.3769136846059923e-05, "loss": 0.4531, "step": 8898, "task_loss": 0.9652959704399109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6476796865463257, "epoch": 7.52, "learning_rate": 1.376444068751761e-05, "loss": 0.6889, "step": 8899, "task_loss": 0.9624375700950623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5519528388977051, "epoch": 7.52, "learning_rate": 1.37597445289753e-05, "loss": 0.4843, "step": 8900, "task_loss": 1.1023050546646118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9998469352722168, "epoch": 7.52, "learning_rate": 1.3755048370432986e-05, "loss": 0.6867, "step": 8901, "task_loss": 0.7491784691810608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6818729639053345, "epoch": 7.52, "learning_rate": 1.3750352211890674e-05, "loss": 0.5012, "step": 8902, "task_loss": 0.9609502553939819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5019899010658264, "epoch": 7.53, "learning_rate": 1.3745656053348364e-05, "loss": 0.5656, "step": 8903, "task_loss": 0.6448581218719482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3940691649913788, "epoch": 7.53, "learning_rate": 1.374095989480605e-05, "loss": 0.5566, "step": 8904, "task_loss": 0.6520074605941772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5160630941390991, "epoch": 7.53, "learning_rate": 1.3736263736263738e-05, "loss": 0.6013, "step": 8905, "task_loss": 0.327972948551178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46807482838630676, "epoch": 7.53, "learning_rate": 1.3731567577721424e-05, "loss": 0.6382, "step": 8906, "task_loss": 0.2665335237979889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6166424751281738, "epoch": 7.53, "learning_rate": 1.3726871419179112e-05, "loss": 0.6259, "step": 8907, "task_loss": 0.6120152473449707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3233594298362732, "epoch": 7.53, "learning_rate": 1.3722175260636799e-05, "loss": 0.5689, "step": 8908, "task_loss": 0.23141354322433472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29720431566238403, "epoch": 7.53, "learning_rate": 1.3717479102094488e-05, "loss": 0.5696, "step": 8909, "task_loss": 0.38837534189224243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35013657808303833, "epoch": 7.53, "learning_rate": 1.3712782943552175e-05, "loss": 0.4565, "step": 8910, "task_loss": 0.2290128469467163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3914502263069153, "epoch": 7.53, "learning_rate": 1.3708086785009863e-05, "loss": 0.4772, "step": 8911, "task_loss": 0.7372626662254333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9474555253982544, "epoch": 7.53, "learning_rate": 1.3703390626467549e-05, "loss": 0.6657, "step": 8912, "task_loss": 0.8762913942337036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43768516182899475, "epoch": 7.53, "learning_rate": 1.3698694467925239e-05, "loss": 0.598, "step": 8913, "task_loss": 1.4424713850021362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5182085037231445, "epoch": 7.53, "learning_rate": 1.3693998309382925e-05, "loss": 0.7094, "step": 8914, "task_loss": 0.4989006519317627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48615866899490356, "epoch": 7.54, "learning_rate": 1.3689302150840613e-05, "loss": 0.595, "step": 8915, "task_loss": 0.925879955291748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6263903379440308, "epoch": 7.54, "learning_rate": 1.36846059922983e-05, "loss": 0.4529, "step": 8916, "task_loss": 0.6599953770637512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7200750708580017, "epoch": 7.54, "learning_rate": 1.3679909833755988e-05, "loss": 0.616, "step": 8917, "task_loss": 0.7497655749320984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5484851598739624, "epoch": 7.54, "learning_rate": 1.3675213675213677e-05, "loss": 0.623, "step": 8918, "task_loss": 0.6548735499382019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38410329818725586, "epoch": 7.54, "learning_rate": 1.3670517516671364e-05, "loss": 0.6769, "step": 8919, "task_loss": 0.7605826258659363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8886547684669495, "epoch": 7.54, "learning_rate": 1.3665821358129052e-05, "loss": 0.6798, "step": 8920, "task_loss": 1.4036505222320557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6687399744987488, "epoch": 7.54, "learning_rate": 1.3661125199586738e-05, "loss": 0.7852, "step": 8921, "task_loss": 1.1702030897140503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46676695346832275, "epoch": 7.54, "learning_rate": 1.3656429041044428e-05, "loss": 0.3971, "step": 8922, "task_loss": 1.0376129150390625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.175830364227295, "epoch": 7.54, "learning_rate": 1.3651732882502114e-05, "loss": 0.746, "step": 8923, "task_loss": 0.9451988339424133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32247814536094666, "epoch": 7.54, "learning_rate": 1.3647036723959802e-05, "loss": 0.6082, "step": 8924, "task_loss": 0.6717820167541504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9461876749992371, "epoch": 7.54, "learning_rate": 1.3642340565417488e-05, "loss": 0.6594, "step": 8925, "task_loss": 0.8213331699371338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5674440860748291, "epoch": 7.54, "learning_rate": 1.3637644406875178e-05, "loss": 0.5767, "step": 8926, "task_loss": 0.8875153064727783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7224075794219971, "epoch": 7.55, "learning_rate": 1.3632948248332863e-05, "loss": 0.6605, "step": 8927, "task_loss": 0.6478766202926636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5364771485328674, "epoch": 7.55, "learning_rate": 1.3628252089790553e-05, "loss": 0.6028, "step": 8928, "task_loss": 0.6891406774520874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6196208596229553, "epoch": 7.55, "learning_rate": 1.3623555931248239e-05, "loss": 0.6297, "step": 8929, "task_loss": 0.6091980934143066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7143288850784302, "epoch": 7.55, "learning_rate": 1.3618859772705927e-05, "loss": 0.6823, "step": 8930, "task_loss": 1.248855471611023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44823336601257324, "epoch": 7.55, "learning_rate": 1.3614163614163613e-05, "loss": 0.501, "step": 8931, "task_loss": 0.35954445600509644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6563115119934082, "epoch": 7.55, "learning_rate": 1.3609467455621303e-05, "loss": 0.6876, "step": 8932, "task_loss": 0.7410834431648254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.568601131439209, "epoch": 7.55, "learning_rate": 1.360477129707899e-05, "loss": 0.624, "step": 8933, "task_loss": 0.8046369552612305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5449913740158081, "epoch": 7.55, "learning_rate": 1.3600075138536677e-05, "loss": 0.5722, "step": 8934, "task_loss": 0.7541571259498596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8767331838607788, "epoch": 7.55, "learning_rate": 1.3595378979994367e-05, "loss": 0.5985, "step": 8935, "task_loss": 1.4879505634307861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5620641708374023, "epoch": 7.55, "learning_rate": 1.3590682821452053e-05, "loss": 0.6724, "step": 8936, "task_loss": 0.33686888217926025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5212889909744263, "epoch": 7.55, "learning_rate": 1.3585986662909741e-05, "loss": 0.4462, "step": 8937, "task_loss": 0.26395881175994873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9156762957572937, "epoch": 7.56, "learning_rate": 1.3581290504367428e-05, "loss": 0.6454, "step": 8938, "task_loss": 0.7454051971435547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49124711751937866, "epoch": 7.56, "learning_rate": 1.3576594345825116e-05, "loss": 0.7858, "step": 8939, "task_loss": 0.36607271432876587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.491756796836853, "epoch": 7.56, "learning_rate": 1.3571898187282802e-05, "loss": 0.6767, "step": 8940, "task_loss": 0.5824946165084839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43414807319641113, "epoch": 7.56, "learning_rate": 1.3567202028740492e-05, "loss": 0.464, "step": 8941, "task_loss": 0.5725322365760803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4916403889656067, "epoch": 7.56, "learning_rate": 1.3562505870198178e-05, "loss": 0.5112, "step": 8942, "task_loss": 0.6741717457771301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5848987698554993, "epoch": 7.56, "learning_rate": 1.3557809711655866e-05, "loss": 0.5424, "step": 8943, "task_loss": 0.9367419481277466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5563918352127075, "epoch": 7.56, "learning_rate": 1.3553113553113553e-05, "loss": 0.512, "step": 8944, "task_loss": 1.1880347728729248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47725293040275574, "epoch": 7.56, "learning_rate": 1.3548417394571242e-05, "loss": 0.5937, "step": 8945, "task_loss": 0.6150954961776733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7626391649246216, "epoch": 7.56, "learning_rate": 1.3543721236028927e-05, "loss": 0.5539, "step": 8946, "task_loss": 1.7414627075195312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5861595869064331, "epoch": 7.56, "learning_rate": 1.3539025077486617e-05, "loss": 0.6792, "step": 8947, "task_loss": 0.2651851773262024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4677463471889496, "epoch": 7.56, "learning_rate": 1.3534328918944303e-05, "loss": 0.5616, "step": 8948, "task_loss": 0.5399291515350342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2978682816028595, "epoch": 7.56, "learning_rate": 1.3529632760401991e-05, "loss": 0.4818, "step": 8949, "task_loss": 0.08552386611700058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5679190158843994, "epoch": 7.57, "learning_rate": 1.352493660185968e-05, "loss": 0.6882, "step": 8950, "task_loss": 1.026499629020691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.713654100894928, "epoch": 7.57, "learning_rate": 1.3520240443317367e-05, "loss": 0.6719, "step": 8951, "task_loss": 1.5975486040115356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2998959720134735, "epoch": 7.57, "learning_rate": 1.3515544284775055e-05, "loss": 0.511, "step": 8952, "task_loss": 0.876070499420166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8697065114974976, "epoch": 7.57, "learning_rate": 1.3510848126232742e-05, "loss": 0.6227, "step": 8953, "task_loss": 0.660861611366272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5706280469894409, "epoch": 7.57, "learning_rate": 1.3506151967690431e-05, "loss": 0.6324, "step": 8954, "task_loss": 1.2270126342773438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42725688219070435, "epoch": 7.57, "learning_rate": 1.3501455809148118e-05, "loss": 0.5597, "step": 8955, "task_loss": 1.181758999824524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7580904960632324, "epoch": 7.57, "learning_rate": 1.3496759650605806e-05, "loss": 0.5905, "step": 8956, "task_loss": 0.7803485989570618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5224002003669739, "epoch": 7.57, "learning_rate": 1.3492063492063492e-05, "loss": 0.6784, "step": 8957, "task_loss": 0.6149889826774597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.18703778088092804, "epoch": 7.57, "learning_rate": 1.3487367333521182e-05, "loss": 0.5966, "step": 8958, "task_loss": 0.3394901752471924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5053856372833252, "epoch": 7.57, "learning_rate": 1.3482671174978866e-05, "loss": 0.5855, "step": 8959, "task_loss": 0.5284372568130493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5106124877929688, "epoch": 7.57, "learning_rate": 1.3477975016436556e-05, "loss": 0.5412, "step": 8960, "task_loss": 0.6728070974349976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7528073787689209, "epoch": 7.57, "learning_rate": 1.3473278857894242e-05, "loss": 0.677, "step": 8961, "task_loss": 0.5724266171455383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31978046894073486, "epoch": 7.58, "learning_rate": 1.346858269935193e-05, "loss": 0.4807, "step": 8962, "task_loss": 0.46166303753852844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7994702458381653, "epoch": 7.58, "learning_rate": 1.3463886540809617e-05, "loss": 0.6427, "step": 8963, "task_loss": 1.0828090906143188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6487932801246643, "epoch": 7.58, "learning_rate": 1.3459190382267307e-05, "loss": 0.5071, "step": 8964, "task_loss": 0.5249520540237427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.663418173789978, "epoch": 7.58, "learning_rate": 1.3454494223724995e-05, "loss": 0.58, "step": 8965, "task_loss": 0.4233308732509613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.636496365070343, "epoch": 7.58, "learning_rate": 1.3449798065182681e-05, "loss": 0.478, "step": 8966, "task_loss": 0.6438242793083191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.871090292930603, "epoch": 7.58, "learning_rate": 1.344510190664037e-05, "loss": 0.5839, "step": 8967, "task_loss": 0.945065438747406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2805964946746826, "epoch": 7.58, "learning_rate": 1.3440405748098055e-05, "loss": 0.9304, "step": 8968, "task_loss": 0.8377017378807068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6269828081130981, "epoch": 7.58, "learning_rate": 1.3435709589555745e-05, "loss": 0.7213, "step": 8969, "task_loss": 0.2992476522922516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37438035011291504, "epoch": 7.58, "learning_rate": 1.3431013431013431e-05, "loss": 0.5837, "step": 8970, "task_loss": 0.4533587098121643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.17088253796100616, "epoch": 7.58, "learning_rate": 1.342631727247112e-05, "loss": 0.4618, "step": 8971, "task_loss": 0.006585790775716305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49435898661613464, "epoch": 7.58, "learning_rate": 1.3421621113928806e-05, "loss": 0.6454, "step": 8972, "task_loss": 0.8248806595802307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6154981255531311, "epoch": 7.58, "learning_rate": 1.3416924955386495e-05, "loss": 0.5523, "step": 8973, "task_loss": 0.5079376697540283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33724647760391235, "epoch": 7.59, "learning_rate": 1.3412228796844182e-05, "loss": 0.4964, "step": 8974, "task_loss": 0.06151442602276802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2564454078674316, "epoch": 7.59, "learning_rate": 1.340753263830187e-05, "loss": 0.8558, "step": 8975, "task_loss": 0.5532498359680176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6405106782913208, "epoch": 7.59, "learning_rate": 1.3402836479759556e-05, "loss": 0.7577, "step": 8976, "task_loss": 0.8192280530929565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2803642451763153, "epoch": 7.59, "learning_rate": 1.3398140321217246e-05, "loss": 0.6088, "step": 8977, "task_loss": 0.42502883076667786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6217877268791199, "epoch": 7.59, "learning_rate": 1.339344416267493e-05, "loss": 0.5847, "step": 8978, "task_loss": 0.8711643218994141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5532035827636719, "epoch": 7.59, "learning_rate": 1.338874800413262e-05, "loss": 0.7135, "step": 8979, "task_loss": 0.8165691494941711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3714231252670288, "epoch": 7.59, "learning_rate": 1.3384051845590308e-05, "loss": 0.5108, "step": 8980, "task_loss": 0.28244656324386597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4669484794139862, "epoch": 7.59, "learning_rate": 1.3379355687047995e-05, "loss": 0.4649, "step": 8981, "task_loss": 0.42589709162712097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5668807029724121, "epoch": 7.59, "learning_rate": 1.3374659528505684e-05, "loss": 0.7024, "step": 8982, "task_loss": 0.5923916697502136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7379693388938904, "epoch": 7.59, "learning_rate": 1.336996336996337e-05, "loss": 0.6037, "step": 8983, "task_loss": 0.5287562608718872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8053090572357178, "epoch": 7.59, "learning_rate": 1.3365267211421059e-05, "loss": 0.6418, "step": 8984, "task_loss": 1.2072690725326538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5498437881469727, "epoch": 7.59, "learning_rate": 1.3360571052878745e-05, "loss": 0.5142, "step": 8985, "task_loss": 0.42125147581100464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9330036640167236, "epoch": 7.6, "learning_rate": 1.3355874894336435e-05, "loss": 0.6433, "step": 8986, "task_loss": 0.4687292277812958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3541223406791687, "epoch": 7.6, "learning_rate": 1.3351178735794121e-05, "loss": 0.5817, "step": 8987, "task_loss": 0.3544537127017975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45443403720855713, "epoch": 7.6, "learning_rate": 1.334648257725181e-05, "loss": 0.6995, "step": 8988, "task_loss": 0.7954627275466919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48673170804977417, "epoch": 7.6, "learning_rate": 1.3341786418709496e-05, "loss": 0.612, "step": 8989, "task_loss": 0.5954718589782715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6466262936592102, "epoch": 7.6, "learning_rate": 1.3337090260167184e-05, "loss": 0.5694, "step": 8990, "task_loss": 0.8541258573532104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.555175244808197, "epoch": 7.6, "learning_rate": 1.333239410162487e-05, "loss": 0.677, "step": 8991, "task_loss": 0.8508421778678894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6556372046470642, "epoch": 7.6, "learning_rate": 1.332769794308256e-05, "loss": 0.4105, "step": 8992, "task_loss": 0.17157778143882751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49563688039779663, "epoch": 7.6, "learning_rate": 1.3323001784540246e-05, "loss": 0.4823, "step": 8993, "task_loss": 0.5375559329986572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6077665090560913, "epoch": 7.6, "learning_rate": 1.3318305625997934e-05, "loss": 0.6284, "step": 8994, "task_loss": 0.6583684682846069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1787350177764893, "epoch": 7.6, "learning_rate": 1.3313609467455624e-05, "loss": 0.7695, "step": 8995, "task_loss": 1.1200685501098633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2662813663482666, "epoch": 7.6, "learning_rate": 1.330891330891331e-05, "loss": 0.549, "step": 8996, "task_loss": 0.4938986301422119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3795223832130432, "epoch": 7.6, "learning_rate": 1.3304217150370998e-05, "loss": 0.6494, "step": 8997, "task_loss": 0.507544994354248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5890479683876038, "epoch": 7.61, "learning_rate": 1.3299520991828684e-05, "loss": 0.5108, "step": 8998, "task_loss": 0.44533658027648926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37492525577545166, "epoch": 7.61, "learning_rate": 1.3294824833286374e-05, "loss": 0.5103, "step": 8999, "task_loss": 0.42242175340652466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2821308672428131, "epoch": 7.61, "learning_rate": 1.3290128674744059e-05, "loss": 0.5102, "step": 9000, "task_loss": 0.23625454306602478 }, { "epoch": 7.61, "eval_accuracy": 0.9008712871287129, "eval_loss": 0.39106252789497375, "eval_runtime": 223.7994, "eval_samples_per_second": 112.824, "eval_steps_per_second": 0.885, "step": 9000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6240146160125732, "epoch": 7.61, "learning_rate": 1.3285432516201749e-05, "loss": 0.5538, "step": 9001, "task_loss": 0.9332427978515625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7686625719070435, "epoch": 7.61, "learning_rate": 1.3280736357659435e-05, "loss": 0.5724, "step": 9002, "task_loss": 0.47290509939193726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5752962827682495, "epoch": 7.61, "learning_rate": 1.3276040199117123e-05, "loss": 0.6499, "step": 9003, "task_loss": 1.0762513875961304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7500045895576477, "epoch": 7.61, "learning_rate": 1.327134404057481e-05, "loss": 0.6265, "step": 9004, "task_loss": 0.840983510017395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3778141736984253, "epoch": 7.61, "learning_rate": 1.3266647882032499e-05, "loss": 0.438, "step": 9005, "task_loss": 0.13706931471824646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8083757162094116, "epoch": 7.61, "learning_rate": 1.3261951723490185e-05, "loss": 0.7386, "step": 9006, "task_loss": 0.4357898533344269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.089493751525879, "epoch": 7.61, "learning_rate": 1.3257255564947873e-05, "loss": 0.8888, "step": 9007, "task_loss": 1.0500649213790894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2269340455532074, "epoch": 7.61, "learning_rate": 1.325255940640556e-05, "loss": 0.4474, "step": 9008, "task_loss": 0.3588985800743103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6700595617294312, "epoch": 7.61, "learning_rate": 1.324786324786325e-05, "loss": 0.7189, "step": 9009, "task_loss": 0.6550379991531372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5407801270484924, "epoch": 7.62, "learning_rate": 1.3243167089320934e-05, "loss": 0.5565, "step": 9010, "task_loss": 1.023596167564392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47549933195114136, "epoch": 7.62, "learning_rate": 1.3238470930778624e-05, "loss": 0.549, "step": 9011, "task_loss": 1.0543586015701294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.915995717048645, "epoch": 7.62, "learning_rate": 1.3233774772236312e-05, "loss": 0.7489, "step": 9012, "task_loss": 1.4029484987258911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6430544853210449, "epoch": 7.62, "learning_rate": 1.3229078613693998e-05, "loss": 0.674, "step": 9013, "task_loss": 1.1995844841003418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8324033617973328, "epoch": 7.62, "learning_rate": 1.3224382455151688e-05, "loss": 0.6483, "step": 9014, "task_loss": 0.6361619234085083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.721589207649231, "epoch": 7.62, "learning_rate": 1.3219686296609374e-05, "loss": 0.648, "step": 9015, "task_loss": 1.43796706199646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6169995069503784, "epoch": 7.62, "learning_rate": 1.3214990138067062e-05, "loss": 0.5317, "step": 9016, "task_loss": 1.1117411851882935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35782501101493835, "epoch": 7.62, "learning_rate": 1.3210293979524749e-05, "loss": 0.566, "step": 9017, "task_loss": 0.23737990856170654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6313146352767944, "epoch": 7.62, "learning_rate": 1.3205597820982438e-05, "loss": 0.5801, "step": 9018, "task_loss": 0.5653353929519653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7927615642547607, "epoch": 7.62, "learning_rate": 1.3200901662440125e-05, "loss": 0.5608, "step": 9019, "task_loss": 0.9226319193840027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5235109329223633, "epoch": 7.62, "learning_rate": 1.3196205503897813e-05, "loss": 0.4633, "step": 9020, "task_loss": 0.5870667099952698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8240448236465454, "epoch": 7.63, "learning_rate": 1.3191509345355499e-05, "loss": 0.5719, "step": 9021, "task_loss": 1.351090908050537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.524917483329773, "epoch": 7.63, "learning_rate": 1.3186813186813187e-05, "loss": 0.6356, "step": 9022, "task_loss": 0.2918887734413147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9738847017288208, "epoch": 7.63, "learning_rate": 1.3182117028270873e-05, "loss": 0.6295, "step": 9023, "task_loss": 1.1196434497833252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4192851781845093, "epoch": 7.63, "learning_rate": 1.3177420869728563e-05, "loss": 0.6232, "step": 9024, "task_loss": 1.1888654232025146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7985727787017822, "epoch": 7.63, "learning_rate": 1.317272471118625e-05, "loss": 0.5935, "step": 9025, "task_loss": 0.7717889547348022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5766173601150513, "epoch": 7.63, "learning_rate": 1.3168028552643938e-05, "loss": 0.6369, "step": 9026, "task_loss": 0.996227502822876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45502209663391113, "epoch": 7.63, "learning_rate": 1.3163332394101627e-05, "loss": 0.8129, "step": 9027, "task_loss": 0.7908227443695068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45022112131118774, "epoch": 7.63, "learning_rate": 1.3158636235559314e-05, "loss": 0.6459, "step": 9028, "task_loss": 0.2278226912021637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6681723594665527, "epoch": 7.63, "learning_rate": 1.3153940077017002e-05, "loss": 0.7479, "step": 9029, "task_loss": 0.9272618293762207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5219153165817261, "epoch": 7.63, "learning_rate": 1.3149243918474688e-05, "loss": 0.5382, "step": 9030, "task_loss": 1.4203537702560425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3625550866127014, "epoch": 7.63, "learning_rate": 1.3144547759932378e-05, "loss": 0.5949, "step": 9031, "task_loss": 0.3922650218009949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5194550156593323, "epoch": 7.63, "learning_rate": 1.3139851601390062e-05, "loss": 0.7612, "step": 9032, "task_loss": 0.4504622519016266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5652658343315125, "epoch": 7.64, "learning_rate": 1.3135155442847752e-05, "loss": 0.6155, "step": 9033, "task_loss": 0.6804161071777344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.30696597695350647, "epoch": 7.64, "learning_rate": 1.3130459284305438e-05, "loss": 0.5816, "step": 9034, "task_loss": 0.14603295922279358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5957750082015991, "epoch": 7.64, "learning_rate": 1.3125763125763126e-05, "loss": 0.6662, "step": 9035, "task_loss": 0.11182977259159088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5989912748336792, "epoch": 7.64, "learning_rate": 1.3121066967220813e-05, "loss": 0.5694, "step": 9036, "task_loss": 0.07786394655704498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6047494411468506, "epoch": 7.64, "learning_rate": 1.3116370808678502e-05, "loss": 0.5719, "step": 9037, "task_loss": 0.4596422016620636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44875067472457886, "epoch": 7.64, "learning_rate": 1.3111674650136189e-05, "loss": 0.5394, "step": 9038, "task_loss": 1.017516851425171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3861045241355896, "epoch": 7.64, "learning_rate": 1.3106978491593877e-05, "loss": 0.6957, "step": 9039, "task_loss": 0.6316030621528625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5494891405105591, "epoch": 7.64, "learning_rate": 1.3102282333051563e-05, "loss": 0.5566, "step": 9040, "task_loss": 0.758734405040741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5578837394714355, "epoch": 7.64, "learning_rate": 1.3097586174509251e-05, "loss": 0.6477, "step": 9041, "task_loss": 0.7325351238250732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.935663104057312, "epoch": 7.64, "learning_rate": 1.3092890015966941e-05, "loss": 0.8376, "step": 9042, "task_loss": 0.9812631011009216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3756678104400635, "epoch": 7.64, "learning_rate": 1.3088193857424627e-05, "loss": 0.5443, "step": 9043, "task_loss": 0.856468915939331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5808850526809692, "epoch": 7.64, "learning_rate": 1.3083497698882315e-05, "loss": 0.5123, "step": 9044, "task_loss": 1.6954728364944458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.074221134185791, "epoch": 7.65, "learning_rate": 1.3078801540340002e-05, "loss": 0.7508, "step": 9045, "task_loss": 1.6013044118881226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7039861679077148, "epoch": 7.65, "learning_rate": 1.3074105381797691e-05, "loss": 0.7932, "step": 9046, "task_loss": 1.1400704383850098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46911051869392395, "epoch": 7.65, "learning_rate": 1.3069409223255378e-05, "loss": 0.5853, "step": 9047, "task_loss": 0.46764498949050903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5258631110191345, "epoch": 7.65, "learning_rate": 1.3064713064713066e-05, "loss": 0.5395, "step": 9048, "task_loss": 1.0783113241195679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.934459388256073, "epoch": 7.65, "learning_rate": 1.3060016906170752e-05, "loss": 0.6228, "step": 9049, "task_loss": 0.6975300908088684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5057191848754883, "epoch": 7.65, "learning_rate": 1.3055320747628442e-05, "loss": 0.5108, "step": 9050, "task_loss": 0.12915122509002686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5388270616531372, "epoch": 7.65, "learning_rate": 1.3050624589086126e-05, "loss": 0.5369, "step": 9051, "task_loss": 0.5442620515823364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3529528081417084, "epoch": 7.65, "learning_rate": 1.3045928430543816e-05, "loss": 0.5094, "step": 9052, "task_loss": 0.08761846274137497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5761469602584839, "epoch": 7.65, "learning_rate": 1.3041232272001503e-05, "loss": 0.4688, "step": 9053, "task_loss": 0.4533674716949463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6292692422866821, "epoch": 7.65, "learning_rate": 1.303653611345919e-05, "loss": 0.6021, "step": 9054, "task_loss": 1.0403292179107666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42903199791908264, "epoch": 7.65, "learning_rate": 1.3031839954916877e-05, "loss": 0.5572, "step": 9055, "task_loss": 0.5404348969459534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7073831558227539, "epoch": 7.65, "learning_rate": 1.3027143796374567e-05, "loss": 0.519, "step": 9056, "task_loss": 0.9190866947174072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9583399295806885, "epoch": 7.66, "learning_rate": 1.3022447637832255e-05, "loss": 0.6038, "step": 9057, "task_loss": 0.9757304787635803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7077600955963135, "epoch": 7.66, "learning_rate": 1.3017751479289941e-05, "loss": 0.5816, "step": 9058, "task_loss": 0.7349674701690674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6411482095718384, "epoch": 7.66, "learning_rate": 1.301305532074763e-05, "loss": 0.5503, "step": 9059, "task_loss": 0.21526868641376495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7603738307952881, "epoch": 7.66, "learning_rate": 1.3008359162205317e-05, "loss": 0.6024, "step": 9060, "task_loss": 0.35224419832229614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5869576334953308, "epoch": 7.66, "learning_rate": 1.3003663003663005e-05, "loss": 0.6084, "step": 9061, "task_loss": 0.7327791452407837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5139350891113281, "epoch": 7.66, "learning_rate": 1.2998966845120691e-05, "loss": 0.4928, "step": 9062, "task_loss": 0.435776025056839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.761914849281311, "epoch": 7.66, "learning_rate": 1.299427068657838e-05, "loss": 0.7615, "step": 9063, "task_loss": 0.8009116053581238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7620334625244141, "epoch": 7.66, "learning_rate": 1.2989574528036066e-05, "loss": 0.5968, "step": 9064, "task_loss": 1.706348180770874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1319060325622559, "epoch": 7.66, "learning_rate": 1.2984878369493756e-05, "loss": 0.7022, "step": 9065, "task_loss": 0.986369788646698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27987271547317505, "epoch": 7.66, "learning_rate": 1.2980182210951442e-05, "loss": 0.547, "step": 9066, "task_loss": 0.6432353258132935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9190165400505066, "epoch": 7.66, "learning_rate": 1.297548605240913e-05, "loss": 0.7284, "step": 9067, "task_loss": 1.4373105764389038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9982471466064453, "epoch": 7.66, "learning_rate": 1.2970789893866816e-05, "loss": 0.7831, "step": 9068, "task_loss": 2.0393905639648438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8950343132019043, "epoch": 7.67, "learning_rate": 1.2966093735324506e-05, "loss": 0.6554, "step": 9069, "task_loss": 0.2895921766757965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8909500241279602, "epoch": 7.67, "learning_rate": 1.2961397576782192e-05, "loss": 0.6539, "step": 9070, "task_loss": 0.9787572622299194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49022552371025085, "epoch": 7.67, "learning_rate": 1.295670141823988e-05, "loss": 0.5488, "step": 9071, "task_loss": 1.1060538291931152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9418529272079468, "epoch": 7.67, "learning_rate": 1.295200525969757e-05, "loss": 0.6518, "step": 9072, "task_loss": 0.5860490202903748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.716158390045166, "epoch": 7.67, "learning_rate": 1.2947309101155255e-05, "loss": 0.7791, "step": 9073, "task_loss": 0.7168653607368469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7122385501861572, "epoch": 7.67, "learning_rate": 1.2942612942612944e-05, "loss": 0.7827, "step": 9074, "task_loss": 0.7226094603538513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39601820707321167, "epoch": 7.67, "learning_rate": 1.293791678407063e-05, "loss": 0.6399, "step": 9075, "task_loss": 0.7039865851402283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6128004193305969, "epoch": 7.67, "learning_rate": 1.2933220625528319e-05, "loss": 0.7994, "step": 9076, "task_loss": 0.81050705909729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6541882753372192, "epoch": 7.67, "learning_rate": 1.2928524466986005e-05, "loss": 0.8796, "step": 9077, "task_loss": 0.8754754066467285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5332605838775635, "epoch": 7.67, "learning_rate": 1.2923828308443695e-05, "loss": 0.6304, "step": 9078, "task_loss": 0.9724287390708923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2524012327194214, "epoch": 7.67, "learning_rate": 1.2919132149901381e-05, "loss": 0.9193, "step": 9079, "task_loss": 1.1661349534988403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3385523557662964, "epoch": 7.67, "learning_rate": 1.291443599135907e-05, "loss": 0.4314, "step": 9080, "task_loss": 0.23091065883636475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6401267647743225, "epoch": 7.68, "learning_rate": 1.2909739832816756e-05, "loss": 0.4992, "step": 9081, "task_loss": 0.7104038596153259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4838736653327942, "epoch": 7.68, "learning_rate": 1.2905043674274445e-05, "loss": 0.4127, "step": 9082, "task_loss": 0.1713932603597641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5863381624221802, "epoch": 7.68, "learning_rate": 1.290034751573213e-05, "loss": 0.6186, "step": 9083, "task_loss": 0.9290936589241028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5556739568710327, "epoch": 7.68, "learning_rate": 1.289565135718982e-05, "loss": 0.5669, "step": 9084, "task_loss": 1.348468542098999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5089474320411682, "epoch": 7.68, "learning_rate": 1.2890955198647506e-05, "loss": 0.6359, "step": 9085, "task_loss": 0.7915548086166382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5317763090133667, "epoch": 7.68, "learning_rate": 1.2886259040105194e-05, "loss": 0.6177, "step": 9086, "task_loss": 0.8904809951782227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9682566523551941, "epoch": 7.68, "learning_rate": 1.288156288156288e-05, "loss": 0.8449, "step": 9087, "task_loss": 0.6071322560310364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6054902076721191, "epoch": 7.68, "learning_rate": 1.287686672302057e-05, "loss": 0.7301, "step": 9088, "task_loss": 0.9057341814041138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41633352637290955, "epoch": 7.68, "learning_rate": 1.2872170564478258e-05, "loss": 0.4463, "step": 9089, "task_loss": 0.21457599103450775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3914380669593811, "epoch": 7.68, "learning_rate": 1.2867474405935945e-05, "loss": 0.4383, "step": 9090, "task_loss": 0.210641548037529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3960869312286377, "epoch": 7.68, "learning_rate": 1.2862778247393634e-05, "loss": 0.5456, "step": 9091, "task_loss": 1.0260602235794067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4361596703529358, "epoch": 7.69, "learning_rate": 1.285808208885132e-05, "loss": 0.4661, "step": 9092, "task_loss": 0.66728675365448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9464641809463501, "epoch": 7.69, "learning_rate": 1.2853385930309009e-05, "loss": 0.7873, "step": 9093, "task_loss": 1.7065085172653198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5799727439880371, "epoch": 7.69, "learning_rate": 1.2848689771766695e-05, "loss": 0.4878, "step": 9094, "task_loss": 0.4960353672504425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5303642749786377, "epoch": 7.69, "learning_rate": 1.2843993613224383e-05, "loss": 0.6481, "step": 9095, "task_loss": 0.24278312921524048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5219902992248535, "epoch": 7.69, "learning_rate": 1.283929745468207e-05, "loss": 0.6134, "step": 9096, "task_loss": 0.07996684312820435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6477904915809631, "epoch": 7.69, "learning_rate": 1.2834601296139759e-05, "loss": 0.7066, "step": 9097, "task_loss": 0.6525054574012756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.584125816822052, "epoch": 7.69, "learning_rate": 1.2829905137597445e-05, "loss": 0.7153, "step": 9098, "task_loss": 1.163608193397522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5698292255401611, "epoch": 7.69, "learning_rate": 1.2825208979055133e-05, "loss": 0.7112, "step": 9099, "task_loss": 0.21385514736175537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46253418922424316, "epoch": 7.69, "learning_rate": 1.282051282051282e-05, "loss": 0.5185, "step": 9100, "task_loss": 0.5286110639572144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5986952781677246, "epoch": 7.69, "learning_rate": 1.281581666197051e-05, "loss": 0.5516, "step": 9101, "task_loss": 0.3507542014122009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.384599506855011, "epoch": 7.69, "learning_rate": 1.2811120503428194e-05, "loss": 0.6357, "step": 9102, "task_loss": 0.2553195655345917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9603703022003174, "epoch": 7.69, "learning_rate": 1.2806424344885884e-05, "loss": 0.677, "step": 9103, "task_loss": 1.233460545539856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3742559552192688, "epoch": 7.7, "learning_rate": 1.2801728186343574e-05, "loss": 0.5709, "step": 9104, "task_loss": 1.3207581043243408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5030856728553772, "epoch": 7.7, "learning_rate": 1.2797032027801258e-05, "loss": 0.4438, "step": 9105, "task_loss": 0.2892622649669647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3378325402736664, "epoch": 7.7, "learning_rate": 1.2792335869258948e-05, "loss": 0.4826, "step": 9106, "task_loss": 0.6476576328277588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.779072642326355, "epoch": 7.7, "learning_rate": 1.2787639710716634e-05, "loss": 0.7122, "step": 9107, "task_loss": 0.443534255027771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7453258037567139, "epoch": 7.7, "learning_rate": 1.2782943552174322e-05, "loss": 0.5403, "step": 9108, "task_loss": 0.9981032609939575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6538985967636108, "epoch": 7.7, "learning_rate": 1.2778247393632009e-05, "loss": 0.5881, "step": 9109, "task_loss": 0.4187701940536499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7382543087005615, "epoch": 7.7, "learning_rate": 1.2773551235089698e-05, "loss": 0.7091, "step": 9110, "task_loss": 0.9419711828231812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3574720621109009, "epoch": 7.7, "learning_rate": 1.2768855076547385e-05, "loss": 0.6426, "step": 9111, "task_loss": 0.8242260813713074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6692391633987427, "epoch": 7.7, "learning_rate": 1.2764158918005073e-05, "loss": 0.5151, "step": 9112, "task_loss": 0.5239729285240173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.30751579999923706, "epoch": 7.7, "learning_rate": 1.275946275946276e-05, "loss": 0.3269, "step": 9113, "task_loss": 0.6043019890785217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39202070236206055, "epoch": 7.7, "learning_rate": 1.2754766600920449e-05, "loss": 0.6135, "step": 9114, "task_loss": 0.8618087768554688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4494565427303314, "epoch": 7.7, "learning_rate": 1.2750070442378134e-05, "loss": 0.5528, "step": 9115, "task_loss": 0.9833083748817444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40449416637420654, "epoch": 7.71, "learning_rate": 1.2745374283835823e-05, "loss": 0.5695, "step": 9116, "task_loss": 0.3218323290348053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44142526388168335, "epoch": 7.71, "learning_rate": 1.274067812529351e-05, "loss": 0.4423, "step": 9117, "task_loss": 0.4833245873451233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4194791316986084, "epoch": 7.71, "learning_rate": 1.2735981966751198e-05, "loss": 0.5912, "step": 9118, "task_loss": 0.8127852082252502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.603406548500061, "epoch": 7.71, "learning_rate": 1.2731285808208887e-05, "loss": 0.6337, "step": 9119, "task_loss": 0.3046627342700958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0755479335784912, "epoch": 7.71, "learning_rate": 1.2726589649666574e-05, "loss": 0.6988, "step": 9120, "task_loss": 0.9110090732574463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7281737327575684, "epoch": 7.71, "learning_rate": 1.2721893491124262e-05, "loss": 0.592, "step": 9121, "task_loss": 1.8244569301605225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6898918151855469, "epoch": 7.71, "learning_rate": 1.2717197332581948e-05, "loss": 0.5982, "step": 9122, "task_loss": 1.0618246793746948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41924935579299927, "epoch": 7.71, "learning_rate": 1.2712501174039638e-05, "loss": 0.6458, "step": 9123, "task_loss": 1.0223493576049805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6569982767105103, "epoch": 7.71, "learning_rate": 1.2707805015497322e-05, "loss": 0.621, "step": 9124, "task_loss": 1.6886991262435913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.20973990857601166, "epoch": 7.71, "learning_rate": 1.2703108856955012e-05, "loss": 0.5426, "step": 9125, "task_loss": 0.050708234310150146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4269612431526184, "epoch": 7.71, "learning_rate": 1.2698412698412699e-05, "loss": 0.4521, "step": 9126, "task_loss": 0.41500040888786316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7017751336097717, "epoch": 7.71, "learning_rate": 1.2693716539870387e-05, "loss": 0.6118, "step": 9127, "task_loss": 0.9715023040771484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5537087917327881, "epoch": 7.72, "learning_rate": 1.2689020381328073e-05, "loss": 0.7616, "step": 9128, "task_loss": 1.0988810062408447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4880543351173401, "epoch": 7.72, "learning_rate": 1.2684324222785763e-05, "loss": 0.5919, "step": 9129, "task_loss": 1.2681373357772827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41118574142456055, "epoch": 7.72, "learning_rate": 1.2679628064243449e-05, "loss": 0.5772, "step": 9130, "task_loss": 0.7922279834747314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47621798515319824, "epoch": 7.72, "learning_rate": 1.2674931905701137e-05, "loss": 0.6616, "step": 9131, "task_loss": 0.3222748637199402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.914586067199707, "epoch": 7.72, "learning_rate": 1.2670235747158823e-05, "loss": 0.7261, "step": 9132, "task_loss": 0.4540279805660248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.17343029379844666, "epoch": 7.72, "learning_rate": 1.2665539588616513e-05, "loss": 0.4178, "step": 9133, "task_loss": 0.0189803596585989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4988674819469452, "epoch": 7.72, "learning_rate": 1.2660843430074201e-05, "loss": 0.8024, "step": 9134, "task_loss": 1.0021387338638306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5167523622512817, "epoch": 7.72, "learning_rate": 1.2656147271531887e-05, "loss": 0.5877, "step": 9135, "task_loss": 0.45380404591560364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5501084327697754, "epoch": 7.72, "learning_rate": 1.2651451112989575e-05, "loss": 0.5061, "step": 9136, "task_loss": 1.2432080507278442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36783260107040405, "epoch": 7.72, "learning_rate": 1.2646754954447262e-05, "loss": 0.4908, "step": 9137, "task_loss": 0.22679972648620605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6445311903953552, "epoch": 7.72, "learning_rate": 1.2642058795904952e-05, "loss": 0.6073, "step": 9138, "task_loss": 0.4383801519870758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5591990947723389, "epoch": 7.72, "learning_rate": 1.2637362637362638e-05, "loss": 0.8892, "step": 9139, "task_loss": 1.142134666442871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.698106586933136, "epoch": 7.73, "learning_rate": 1.2632666478820326e-05, "loss": 0.688, "step": 9140, "task_loss": 1.1024712324142456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5605455040931702, "epoch": 7.73, "learning_rate": 1.2627970320278012e-05, "loss": 0.5679, "step": 9141, "task_loss": 0.4387340247631073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5434353351593018, "epoch": 7.73, "learning_rate": 1.2623274161735702e-05, "loss": 0.5506, "step": 9142, "task_loss": 1.2082550525665283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6965538263320923, "epoch": 7.73, "learning_rate": 1.2618578003193388e-05, "loss": 0.7547, "step": 9143, "task_loss": 1.266932487487793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3385482728481293, "epoch": 7.73, "learning_rate": 1.2613881844651076e-05, "loss": 0.4992, "step": 9144, "task_loss": 0.7207286953926086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7837297320365906, "epoch": 7.73, "learning_rate": 1.2609185686108763e-05, "loss": 0.6465, "step": 9145, "task_loss": 1.0924104452133179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2767721116542816, "epoch": 7.73, "learning_rate": 1.260448952756645e-05, "loss": 0.5179, "step": 9146, "task_loss": 0.16237977147102356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43798738718032837, "epoch": 7.73, "learning_rate": 1.2599793369024137e-05, "loss": 0.4801, "step": 9147, "task_loss": 0.3549467921257019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35968178510665894, "epoch": 7.73, "learning_rate": 1.2595097210481827e-05, "loss": 0.4939, "step": 9148, "task_loss": 1.0481843948364258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4890681803226471, "epoch": 7.73, "learning_rate": 1.2590401051939515e-05, "loss": 0.7234, "step": 9149, "task_loss": 0.5798113346099854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.504951000213623, "epoch": 7.73, "learning_rate": 1.2585704893397201e-05, "loss": 0.7017, "step": 9150, "task_loss": 1.2618829011917114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7649238109588623, "epoch": 7.73, "learning_rate": 1.2581008734854891e-05, "loss": 0.7626, "step": 9151, "task_loss": 1.3594021797180176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7035589218139648, "epoch": 7.74, "learning_rate": 1.2576312576312577e-05, "loss": 0.5397, "step": 9152, "task_loss": 1.0005548000335693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49800413846969604, "epoch": 7.74, "learning_rate": 1.2571616417770265e-05, "loss": 0.5995, "step": 9153, "task_loss": 0.398371160030365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5780568718910217, "epoch": 7.74, "learning_rate": 1.2566920259227952e-05, "loss": 0.603, "step": 9154, "task_loss": 0.7580156922340393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3841339349746704, "epoch": 7.74, "learning_rate": 1.2562224100685641e-05, "loss": 0.6803, "step": 9155, "task_loss": 0.44215860962867737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48779815435409546, "epoch": 7.74, "learning_rate": 1.2557527942143326e-05, "loss": 0.5395, "step": 9156, "task_loss": 0.5779473185539246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5328159332275391, "epoch": 7.74, "learning_rate": 1.2552831783601016e-05, "loss": 0.4845, "step": 9157, "task_loss": 1.5250518321990967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5204998254776001, "epoch": 7.74, "learning_rate": 1.2548135625058702e-05, "loss": 0.5556, "step": 9158, "task_loss": 1.3276736736297607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8015720844268799, "epoch": 7.74, "learning_rate": 1.254343946651639e-05, "loss": 0.6488, "step": 9159, "task_loss": 0.7214223742485046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4204719364643097, "epoch": 7.74, "learning_rate": 1.2538743307974076e-05, "loss": 0.5335, "step": 9160, "task_loss": 0.37858372926712036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47613096237182617, "epoch": 7.74, "learning_rate": 1.2534047149431766e-05, "loss": 0.5842, "step": 9161, "task_loss": 0.9390247464179993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9023482799530029, "epoch": 7.74, "learning_rate": 1.2529350990889453e-05, "loss": 0.6688, "step": 9162, "task_loss": 2.1215975284576416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4858042299747467, "epoch": 7.75, "learning_rate": 1.252465483234714e-05, "loss": 0.4759, "step": 9163, "task_loss": 0.3425959646701813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9150476455688477, "epoch": 7.75, "learning_rate": 1.2519958673804827e-05, "loss": 0.6717, "step": 9164, "task_loss": 1.1654555797576904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6656334400177002, "epoch": 7.75, "learning_rate": 1.2515262515262517e-05, "loss": 0.6427, "step": 9165, "task_loss": 0.6464642286300659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3988196849822998, "epoch": 7.75, "learning_rate": 1.2510566356720205e-05, "loss": 0.4966, "step": 9166, "task_loss": 0.538947582244873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9869555234909058, "epoch": 7.75, "learning_rate": 1.2505870198177891e-05, "loss": 0.7167, "step": 9167, "task_loss": 1.5304919481277466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6806762218475342, "epoch": 7.75, "learning_rate": 1.2501174039635579e-05, "loss": 0.5933, "step": 9168, "task_loss": 0.622896134853363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5139034390449524, "epoch": 7.75, "learning_rate": 1.2496477881093265e-05, "loss": 0.5384, "step": 9169, "task_loss": 0.46794429421424866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4906321167945862, "epoch": 7.75, "learning_rate": 1.2491781722550953e-05, "loss": 0.5317, "step": 9170, "task_loss": 0.6787499785423279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6076678037643433, "epoch": 7.75, "learning_rate": 1.2487085564008641e-05, "loss": 0.7351, "step": 9171, "task_loss": 0.9412274360656738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7542492151260376, "epoch": 7.75, "learning_rate": 1.2482389405466328e-05, "loss": 0.7301, "step": 9172, "task_loss": 0.33520692586898804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6805561780929565, "epoch": 7.75, "learning_rate": 1.2477693246924017e-05, "loss": 0.6231, "step": 9173, "task_loss": 0.32480013370513916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8378483057022095, "epoch": 7.75, "learning_rate": 1.2472997088381706e-05, "loss": 0.7488, "step": 9174, "task_loss": 0.8898438811302185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6112545132637024, "epoch": 7.76, "learning_rate": 1.2468300929839392e-05, "loss": 0.5865, "step": 9175, "task_loss": 0.7700486779212952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.639783501625061, "epoch": 7.76, "learning_rate": 1.246360477129708e-05, "loss": 0.606, "step": 9176, "task_loss": 0.2865011394023895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45608949661254883, "epoch": 7.76, "learning_rate": 1.2458908612754768e-05, "loss": 0.4659, "step": 9177, "task_loss": 1.2098056077957153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43934351205825806, "epoch": 7.76, "learning_rate": 1.2454212454212454e-05, "loss": 0.5512, "step": 9178, "task_loss": 0.6544497609138489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5111106634140015, "epoch": 7.76, "learning_rate": 1.2449516295670142e-05, "loss": 0.7098, "step": 9179, "task_loss": 0.5310473442077637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7772449851036072, "epoch": 7.76, "learning_rate": 1.244482013712783e-05, "loss": 0.692, "step": 9180, "task_loss": 0.9565535187721252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49975037574768066, "epoch": 7.76, "learning_rate": 1.2440123978585518e-05, "loss": 0.6347, "step": 9181, "task_loss": 1.1027231216430664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35768193006515503, "epoch": 7.76, "learning_rate": 1.2435427820043205e-05, "loss": 0.5501, "step": 9182, "task_loss": 0.22229939699172974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5410670638084412, "epoch": 7.76, "learning_rate": 1.2430731661500893e-05, "loss": 0.7796, "step": 9183, "task_loss": 0.3469536006450653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5685247778892517, "epoch": 7.76, "learning_rate": 1.242603550295858e-05, "loss": 0.5611, "step": 9184, "task_loss": 1.103642463684082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.347300261259079, "epoch": 7.76, "learning_rate": 1.2421339344416267e-05, "loss": 0.6601, "step": 9185, "task_loss": 0.19665087759494781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5155866146087646, "epoch": 7.76, "learning_rate": 1.2416643185873955e-05, "loss": 0.5735, "step": 9186, "task_loss": 1.4749654531478882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5023394823074341, "epoch": 7.77, "learning_rate": 1.2411947027331643e-05, "loss": 0.5986, "step": 9187, "task_loss": 0.15497104823589325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4750116765499115, "epoch": 7.77, "learning_rate": 1.2407250868789331e-05, "loss": 0.4847, "step": 9188, "task_loss": 0.3762258291244507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5632235407829285, "epoch": 7.77, "learning_rate": 1.240255471024702e-05, "loss": 0.5459, "step": 9189, "task_loss": 1.1821222305297852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49339884519577026, "epoch": 7.77, "learning_rate": 1.2397858551704707e-05, "loss": 0.564, "step": 9190, "task_loss": 0.677377462387085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.044076681137085, "epoch": 7.77, "learning_rate": 1.2393162393162394e-05, "loss": 0.6389, "step": 9191, "task_loss": 1.2182095050811768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34849095344543457, "epoch": 7.77, "learning_rate": 1.2388466234620082e-05, "loss": 0.5521, "step": 9192, "task_loss": 0.6974703073501587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7654725313186646, "epoch": 7.77, "learning_rate": 1.238377007607777e-05, "loss": 0.5853, "step": 9193, "task_loss": 1.2249311208724976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6387283802032471, "epoch": 7.77, "learning_rate": 1.2379073917535456e-05, "loss": 0.595, "step": 9194, "task_loss": 1.379571795463562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41855818033218384, "epoch": 7.77, "learning_rate": 1.2374377758993144e-05, "loss": 0.5867, "step": 9195, "task_loss": 0.10525903105735779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35606756806373596, "epoch": 7.77, "learning_rate": 1.2369681600450832e-05, "loss": 0.6598, "step": 9196, "task_loss": 0.35298478603363037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.78181391954422, "epoch": 7.77, "learning_rate": 1.2364985441908518e-05, "loss": 0.657, "step": 9197, "task_loss": 1.3825501203536987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.586603581905365, "epoch": 7.77, "learning_rate": 1.2360289283366206e-05, "loss": 0.5486, "step": 9198, "task_loss": 0.6161037087440491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5804572105407715, "epoch": 7.78, "learning_rate": 1.2355593124823895e-05, "loss": 0.5367, "step": 9199, "task_loss": 0.8979007005691528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5481369495391846, "epoch": 7.78, "learning_rate": 1.2350896966281583e-05, "loss": 0.6065, "step": 9200, "task_loss": 1.4870136976242065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6331534385681152, "epoch": 7.78, "learning_rate": 1.2346200807739269e-05, "loss": 0.7141, "step": 9201, "task_loss": 1.0938572883605957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7696507573127747, "epoch": 7.78, "learning_rate": 1.2341504649196957e-05, "loss": 0.6515, "step": 9202, "task_loss": 0.8475361466407776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.505730926990509, "epoch": 7.78, "learning_rate": 1.2336808490654645e-05, "loss": 0.5447, "step": 9203, "task_loss": 1.472287893295288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4598817229270935, "epoch": 7.78, "learning_rate": 1.2332112332112333e-05, "loss": 0.5621, "step": 9204, "task_loss": 0.7953306436538696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6031165719032288, "epoch": 7.78, "learning_rate": 1.2327416173570021e-05, "loss": 0.5654, "step": 9205, "task_loss": 0.42854011058807373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6213238835334778, "epoch": 7.78, "learning_rate": 1.2322720015027709e-05, "loss": 0.6743, "step": 9206, "task_loss": 0.417527973651886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4177520275115967, "epoch": 7.78, "learning_rate": 1.2318023856485395e-05, "loss": 0.5231, "step": 9207, "task_loss": 0.5193702578544617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5149577260017395, "epoch": 7.78, "learning_rate": 1.2313327697943083e-05, "loss": 0.532, "step": 9208, "task_loss": 0.4460641145706177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6404176950454712, "epoch": 7.78, "learning_rate": 1.2308631539400771e-05, "loss": 0.6925, "step": 9209, "task_loss": 0.2899162471294403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.337053507566452, "epoch": 7.78, "learning_rate": 1.2303935380858458e-05, "loss": 0.6616, "step": 9210, "task_loss": 1.261913537979126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5292542576789856, "epoch": 7.79, "learning_rate": 1.2299239222316146e-05, "loss": 0.5465, "step": 9211, "task_loss": 0.67164146900177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5491082668304443, "epoch": 7.79, "learning_rate": 1.2294543063773834e-05, "loss": 0.6807, "step": 9212, "task_loss": 1.5212477445602417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4064103662967682, "epoch": 7.79, "learning_rate": 1.228984690523152e-05, "loss": 0.5185, "step": 9213, "task_loss": 0.5700362324714661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3824954032897949, "epoch": 7.79, "learning_rate": 1.2285150746689208e-05, "loss": 0.6112, "step": 9214, "task_loss": 0.159159317612648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4862583577632904, "epoch": 7.79, "learning_rate": 1.2280454588146896e-05, "loss": 0.5443, "step": 9215, "task_loss": 0.8809894323348999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6186376810073853, "epoch": 7.79, "learning_rate": 1.2275758429604584e-05, "loss": 0.6747, "step": 9216, "task_loss": 0.621317982673645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3610045611858368, "epoch": 7.79, "learning_rate": 1.227106227106227e-05, "loss": 0.5721, "step": 9217, "task_loss": 0.9602952003479004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7254411578178406, "epoch": 7.79, "learning_rate": 1.2266366112519959e-05, "loss": 0.5311, "step": 9218, "task_loss": 1.5485200881958008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48792755603790283, "epoch": 7.79, "learning_rate": 1.2261669953977647e-05, "loss": 0.6732, "step": 9219, "task_loss": 0.5640134215354919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44808119535446167, "epoch": 7.79, "learning_rate": 1.2256973795435335e-05, "loss": 0.5602, "step": 9220, "task_loss": 0.6537078619003296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7320519685745239, "epoch": 7.79, "learning_rate": 1.2252277636893023e-05, "loss": 0.7536, "step": 9221, "task_loss": 0.39484989643096924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4544448256492615, "epoch": 7.79, "learning_rate": 1.224758147835071e-05, "loss": 0.6433, "step": 9222, "task_loss": 0.5393927097320557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6021705865859985, "epoch": 7.8, "learning_rate": 1.2242885319808397e-05, "loss": 0.5589, "step": 9223, "task_loss": 0.7008814811706543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4422619938850403, "epoch": 7.8, "learning_rate": 1.2238189161266085e-05, "loss": 0.525, "step": 9224, "task_loss": 0.698296070098877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35933879017829895, "epoch": 7.8, "learning_rate": 1.2233493002723773e-05, "loss": 0.6125, "step": 9225, "task_loss": 0.7790676355361938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6248863935470581, "epoch": 7.8, "learning_rate": 1.222879684418146e-05, "loss": 0.7916, "step": 9226, "task_loss": 1.2229695320129395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7148008346557617, "epoch": 7.8, "learning_rate": 1.2224100685639148e-05, "loss": 0.5667, "step": 9227, "task_loss": 1.144397258758545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3828563988208771, "epoch": 7.8, "learning_rate": 1.2219404527096836e-05, "loss": 0.4809, "step": 9228, "task_loss": 0.1446860432624817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0114279985427856, "epoch": 7.8, "learning_rate": 1.2214708368554522e-05, "loss": 0.6302, "step": 9229, "task_loss": 0.6793627738952637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40103140473365784, "epoch": 7.8, "learning_rate": 1.221001221001221e-05, "loss": 0.5728, "step": 9230, "task_loss": 0.9837297797203064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.531965970993042, "epoch": 7.8, "learning_rate": 1.2205316051469898e-05, "loss": 0.4819, "step": 9231, "task_loss": 0.5519455075263977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5232385396957397, "epoch": 7.8, "learning_rate": 1.2200619892927586e-05, "loss": 0.4742, "step": 9232, "task_loss": 0.5760833621025085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7571118474006653, "epoch": 7.8, "learning_rate": 1.2195923734385272e-05, "loss": 0.5627, "step": 9233, "task_loss": 0.9252549409866333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6736440062522888, "epoch": 7.81, "learning_rate": 1.2191227575842962e-05, "loss": 0.5676, "step": 9234, "task_loss": 0.6488844156265259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.680043637752533, "epoch": 7.81, "learning_rate": 1.2186531417300648e-05, "loss": 0.6641, "step": 9235, "task_loss": 0.700019896030426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5593346357345581, "epoch": 7.81, "learning_rate": 1.2181835258758337e-05, "loss": 0.5739, "step": 9236, "task_loss": 0.8230302929878235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.725781261920929, "epoch": 7.81, "learning_rate": 1.2177139100216025e-05, "loss": 0.5251, "step": 9237, "task_loss": 1.1301664113998413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9725197553634644, "epoch": 7.81, "learning_rate": 1.2172442941673713e-05, "loss": 0.6308, "step": 9238, "task_loss": 0.5464795827865601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6658626794815063, "epoch": 7.81, "learning_rate": 1.2167746783131399e-05, "loss": 0.4178, "step": 9239, "task_loss": 0.7836083769798279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44252678751945496, "epoch": 7.81, "learning_rate": 1.2163050624589087e-05, "loss": 0.7339, "step": 9240, "task_loss": 0.1419605016708374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5563592910766602, "epoch": 7.81, "learning_rate": 1.2158354466046775e-05, "loss": 0.5561, "step": 9241, "task_loss": 0.6091758608818054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.554627537727356, "epoch": 7.81, "learning_rate": 1.2153658307504461e-05, "loss": 0.5197, "step": 9242, "task_loss": 0.628325879573822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4820675253868103, "epoch": 7.81, "learning_rate": 1.214896214896215e-05, "loss": 0.5898, "step": 9243, "task_loss": 0.5412018895149231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3770894408226013, "epoch": 7.81, "learning_rate": 1.2144265990419837e-05, "loss": 0.3667, "step": 9244, "task_loss": 0.8983127474784851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5398417711257935, "epoch": 7.81, "learning_rate": 1.2139569831877524e-05, "loss": 0.6733, "step": 9245, "task_loss": 1.429343819618225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39328181743621826, "epoch": 7.82, "learning_rate": 1.2134873673335212e-05, "loss": 0.6093, "step": 9246, "task_loss": 0.30315101146698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.501168966293335, "epoch": 7.82, "learning_rate": 1.21301775147929e-05, "loss": 0.651, "step": 9247, "task_loss": 1.0697669982910156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7110928893089294, "epoch": 7.82, "learning_rate": 1.2125481356250588e-05, "loss": 0.664, "step": 9248, "task_loss": 1.3785151243209839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.669590175151825, "epoch": 7.82, "learning_rate": 1.2120785197708274e-05, "loss": 0.5392, "step": 9249, "task_loss": 0.8829492926597595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49665331840515137, "epoch": 7.82, "learning_rate": 1.2116089039165964e-05, "loss": 0.4477, "step": 9250, "task_loss": 0.5024304986000061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8927820920944214, "epoch": 7.82, "learning_rate": 1.211139288062365e-05, "loss": 0.7073, "step": 9251, "task_loss": 1.3486604690551758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33343306183815, "epoch": 7.82, "learning_rate": 1.2106696722081338e-05, "loss": 0.5713, "step": 9252, "task_loss": 1.3845577239990234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.525398313999176, "epoch": 7.82, "learning_rate": 1.2102000563539026e-05, "loss": 0.6371, "step": 9253, "task_loss": 0.390460342168808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.422050416469574, "epoch": 7.82, "learning_rate": 1.2097304404996714e-05, "loss": 0.7154, "step": 9254, "task_loss": 0.25287243723869324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.78952556848526, "epoch": 7.82, "learning_rate": 1.20926082464544e-05, "loss": 0.7335, "step": 9255, "task_loss": 1.0304908752441406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5443419218063354, "epoch": 7.82, "learning_rate": 1.2087912087912089e-05, "loss": 0.5969, "step": 9256, "task_loss": 0.7887260913848877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6322680711746216, "epoch": 7.82, "learning_rate": 1.2083215929369777e-05, "loss": 0.5048, "step": 9257, "task_loss": 0.8443447351455688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3738507628440857, "epoch": 7.83, "learning_rate": 1.2078519770827463e-05, "loss": 0.5274, "step": 9258, "task_loss": 0.4838450253009796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9827705025672913, "epoch": 7.83, "learning_rate": 1.2073823612285151e-05, "loss": 0.6867, "step": 9259, "task_loss": 1.1186875104904175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5147654414176941, "epoch": 7.83, "learning_rate": 1.206912745374284e-05, "loss": 0.5119, "step": 9260, "task_loss": 0.8690692782402039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7297931909561157, "epoch": 7.83, "learning_rate": 1.2064431295200525e-05, "loss": 0.5093, "step": 9261, "task_loss": 1.146477460861206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4656773805618286, "epoch": 7.83, "learning_rate": 1.2059735136658214e-05, "loss": 0.603, "step": 9262, "task_loss": 0.03806782513856888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6247949600219727, "epoch": 7.83, "learning_rate": 1.2055038978115902e-05, "loss": 0.4347, "step": 9263, "task_loss": 0.4409271478652954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5772960186004639, "epoch": 7.83, "learning_rate": 1.2050342819573588e-05, "loss": 0.5083, "step": 9264, "task_loss": 1.0159989595413208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7793696522712708, "epoch": 7.83, "learning_rate": 1.2045646661031278e-05, "loss": 0.6689, "step": 9265, "task_loss": 0.43511030077934265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5887829065322876, "epoch": 7.83, "learning_rate": 1.2040950502488966e-05, "loss": 0.6558, "step": 9266, "task_loss": 0.2553347647190094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8171536922454834, "epoch": 7.83, "learning_rate": 1.2036254343946652e-05, "loss": 0.643, "step": 9267, "task_loss": 1.4963891506195068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5391876697540283, "epoch": 7.83, "learning_rate": 1.203155818540434e-05, "loss": 0.6894, "step": 9268, "task_loss": 1.0888868570327759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3920934796333313, "epoch": 7.83, "learning_rate": 1.2026862026862028e-05, "loss": 0.5112, "step": 9269, "task_loss": 0.4113203287124634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4259289801120758, "epoch": 7.84, "learning_rate": 1.2022165868319714e-05, "loss": 0.6032, "step": 9270, "task_loss": 0.03226156160235405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.542033851146698, "epoch": 7.84, "learning_rate": 1.2017469709777402e-05, "loss": 0.6857, "step": 9271, "task_loss": 0.42736196517944336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6910910606384277, "epoch": 7.84, "learning_rate": 1.201277355123509e-05, "loss": 0.701, "step": 9272, "task_loss": 1.0228691101074219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3733825981616974, "epoch": 7.84, "learning_rate": 1.2008077392692779e-05, "loss": 0.5567, "step": 9273, "task_loss": 1.0164154767990112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34405550360679626, "epoch": 7.84, "learning_rate": 1.2003381234150465e-05, "loss": 0.4437, "step": 9274, "task_loss": 0.9981056451797485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8418645858764648, "epoch": 7.84, "learning_rate": 1.1998685075608153e-05, "loss": 0.5922, "step": 9275, "task_loss": 1.7119226455688477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4106428921222687, "epoch": 7.84, "learning_rate": 1.1993988917065841e-05, "loss": 0.6166, "step": 9276, "task_loss": 0.6535403728485107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43405449390411377, "epoch": 7.84, "learning_rate": 1.1989292758523527e-05, "loss": 0.5025, "step": 9277, "task_loss": 0.9019235968589783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4661434292793274, "epoch": 7.84, "learning_rate": 1.1984596599981215e-05, "loss": 0.5699, "step": 9278, "task_loss": 0.6574129462242126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2712264955043793, "epoch": 7.84, "learning_rate": 1.1979900441438903e-05, "loss": 0.5174, "step": 9279, "task_loss": 0.14479851722717285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37713897228240967, "epoch": 7.84, "learning_rate": 1.1975204282896591e-05, "loss": 0.6373, "step": 9280, "task_loss": 0.32519257068634033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37606072425842285, "epoch": 7.84, "learning_rate": 1.197050812435428e-05, "loss": 0.4848, "step": 9281, "task_loss": 0.8021999001502991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47114115953445435, "epoch": 7.85, "learning_rate": 1.1965811965811967e-05, "loss": 0.6278, "step": 9282, "task_loss": 0.4739890396595001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39906805753707886, "epoch": 7.85, "learning_rate": 1.1961115807269654e-05, "loss": 0.4275, "step": 9283, "task_loss": 1.4123624563217163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6528750061988831, "epoch": 7.85, "learning_rate": 1.1956419648727342e-05, "loss": 0.5853, "step": 9284, "task_loss": 0.5686354637145996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47922807931900024, "epoch": 7.85, "learning_rate": 1.195172349018503e-05, "loss": 0.6672, "step": 9285, "task_loss": 0.7791629433631897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5588380098342896, "epoch": 7.85, "learning_rate": 1.1947027331642716e-05, "loss": 0.6362, "step": 9286, "task_loss": 0.4977732002735138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44100531935691833, "epoch": 7.85, "learning_rate": 1.1942331173100404e-05, "loss": 0.528, "step": 9287, "task_loss": 0.5409245491027832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5876507759094238, "epoch": 7.85, "learning_rate": 1.1937635014558092e-05, "loss": 0.5226, "step": 9288, "task_loss": 1.4153391122817993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8199916481971741, "epoch": 7.85, "learning_rate": 1.193293885601578e-05, "loss": 0.6768, "step": 9289, "task_loss": 0.7387357354164124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5163699388504028, "epoch": 7.85, "learning_rate": 1.1928242697473467e-05, "loss": 0.5078, "step": 9290, "task_loss": 0.9535502791404724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43480703234672546, "epoch": 7.85, "learning_rate": 1.1923546538931155e-05, "loss": 0.4218, "step": 9291, "task_loss": 0.4559215307235718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8973122835159302, "epoch": 7.85, "learning_rate": 1.1918850380388843e-05, "loss": 0.7327, "step": 9292, "task_loss": 1.3155992031097412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6073883771896362, "epoch": 7.85, "learning_rate": 1.1914154221846529e-05, "loss": 0.5386, "step": 9293, "task_loss": 0.41464728116989136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44240957498550415, "epoch": 7.86, "learning_rate": 1.1909458063304217e-05, "loss": 0.5184, "step": 9294, "task_loss": 0.6909059882164001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3904566764831543, "epoch": 7.86, "learning_rate": 1.1904761904761905e-05, "loss": 0.4832, "step": 9295, "task_loss": 0.5978354811668396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5160146951675415, "epoch": 7.86, "learning_rate": 1.1900065746219593e-05, "loss": 0.4946, "step": 9296, "task_loss": 0.487170934677124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6873438358306885, "epoch": 7.86, "learning_rate": 1.1895369587677281e-05, "loss": 0.8019, "step": 9297, "task_loss": 0.3708606958389282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8872530460357666, "epoch": 7.86, "learning_rate": 1.189067342913497e-05, "loss": 0.6762, "step": 9298, "task_loss": 0.594131350517273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7674573659896851, "epoch": 7.86, "learning_rate": 1.1885977270592656e-05, "loss": 0.6091, "step": 9299, "task_loss": 1.1135389804840088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6771416664123535, "epoch": 7.86, "learning_rate": 1.1881281112050344e-05, "loss": 0.602, "step": 9300, "task_loss": 1.4254180192947388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4522908926010132, "epoch": 7.86, "learning_rate": 1.1876584953508032e-05, "loss": 0.5072, "step": 9301, "task_loss": 0.3965552747249603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40450942516326904, "epoch": 7.86, "learning_rate": 1.1871888794965718e-05, "loss": 0.5416, "step": 9302, "task_loss": 1.0282851457595825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.689955472946167, "epoch": 7.86, "learning_rate": 1.1867192636423406e-05, "loss": 0.5231, "step": 9303, "task_loss": 0.6908197999000549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47168460488319397, "epoch": 7.86, "learning_rate": 1.1862496477881094e-05, "loss": 0.5885, "step": 9304, "task_loss": 1.0658775568008423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5721685886383057, "epoch": 7.87, "learning_rate": 1.1857800319338782e-05, "loss": 0.6076, "step": 9305, "task_loss": 0.6171835064888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4201356768608093, "epoch": 7.87, "learning_rate": 1.1853104160796468e-05, "loss": 0.6086, "step": 9306, "task_loss": 0.48751622438430786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7833970785140991, "epoch": 7.87, "learning_rate": 1.1848408002254156e-05, "loss": 0.6819, "step": 9307, "task_loss": 0.9044036269187927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8697282075881958, "epoch": 7.87, "learning_rate": 1.1843711843711844e-05, "loss": 0.6304, "step": 9308, "task_loss": 0.6825411915779114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8512937426567078, "epoch": 7.87, "learning_rate": 1.183901568516953e-05, "loss": 0.8403, "step": 9309, "task_loss": 0.8805340528488159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46407240629196167, "epoch": 7.87, "learning_rate": 1.1834319526627219e-05, "loss": 0.6655, "step": 9310, "task_loss": 0.9773011803627014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5964052677154541, "epoch": 7.87, "learning_rate": 1.1829623368084909e-05, "loss": 0.7093, "step": 9311, "task_loss": 0.837425172328949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5961411595344543, "epoch": 7.87, "learning_rate": 1.1824927209542595e-05, "loss": 0.8123, "step": 9312, "task_loss": 0.916536271572113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3964337110519409, "epoch": 7.87, "learning_rate": 1.1820231051000283e-05, "loss": 0.5594, "step": 9313, "task_loss": 0.9332810640335083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3785072863101959, "epoch": 7.87, "learning_rate": 1.1815534892457971e-05, "loss": 0.6746, "step": 9314, "task_loss": 0.6761341691017151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4117019772529602, "epoch": 7.87, "learning_rate": 1.1810838733915657e-05, "loss": 0.6267, "step": 9315, "task_loss": 0.9602721929550171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4055083692073822, "epoch": 7.87, "learning_rate": 1.1806142575373345e-05, "loss": 0.5866, "step": 9316, "task_loss": 0.3321363031864166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7163821458816528, "epoch": 7.88, "learning_rate": 1.1801446416831033e-05, "loss": 0.6841, "step": 9317, "task_loss": 1.2158845663070679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6238053441047668, "epoch": 7.88, "learning_rate": 1.179675025828872e-05, "loss": 0.5669, "step": 9318, "task_loss": 0.6698490381240845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6481142044067383, "epoch": 7.88, "learning_rate": 1.1792054099746408e-05, "loss": 0.6501, "step": 9319, "task_loss": 0.4525347054004669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3519462049007416, "epoch": 7.88, "learning_rate": 1.1787357941204096e-05, "loss": 0.5407, "step": 9320, "task_loss": 0.6376115679740906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7407633662223816, "epoch": 7.88, "learning_rate": 1.1782661782661784e-05, "loss": 0.639, "step": 9321, "task_loss": 0.6575656533241272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7002640962600708, "epoch": 7.88, "learning_rate": 1.177796562411947e-05, "loss": 0.6071, "step": 9322, "task_loss": 0.36492401361465454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26824867725372314, "epoch": 7.88, "learning_rate": 1.1773269465577158e-05, "loss": 0.5516, "step": 9323, "task_loss": 0.6223195791244507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35870224237442017, "epoch": 7.88, "learning_rate": 1.1768573307034846e-05, "loss": 0.4982, "step": 9324, "task_loss": 1.09247887134552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27173057198524475, "epoch": 7.88, "learning_rate": 1.1763877148492533e-05, "loss": 0.5433, "step": 9325, "task_loss": 1.0355149507522583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8483541011810303, "epoch": 7.88, "learning_rate": 1.175918098995022e-05, "loss": 0.7056, "step": 9326, "task_loss": 1.5064935684204102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6126683950424194, "epoch": 7.88, "learning_rate": 1.175448483140791e-05, "loss": 0.6427, "step": 9327, "task_loss": 0.46967265009880066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6771754026412964, "epoch": 7.88, "learning_rate": 1.1749788672865597e-05, "loss": 0.6561, "step": 9328, "task_loss": 1.4329633712768555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3508133888244629, "epoch": 7.89, "learning_rate": 1.1745092514323285e-05, "loss": 0.496, "step": 9329, "task_loss": 1.0206433534622192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44232216477394104, "epoch": 7.89, "learning_rate": 1.1740396355780973e-05, "loss": 0.6219, "step": 9330, "task_loss": 0.6223843693733215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7691107988357544, "epoch": 7.89, "learning_rate": 1.1735700197238659e-05, "loss": 0.5673, "step": 9331, "task_loss": 0.6682069301605225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6825751662254333, "epoch": 7.89, "learning_rate": 1.1731004038696347e-05, "loss": 0.8155, "step": 9332, "task_loss": 1.1338684558868408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34660395979881287, "epoch": 7.89, "learning_rate": 1.1726307880154035e-05, "loss": 0.4307, "step": 9333, "task_loss": 1.0058459043502808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5644561052322388, "epoch": 7.89, "learning_rate": 1.1721611721611721e-05, "loss": 0.6537, "step": 9334, "task_loss": 0.4792861044406891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3123962879180908, "epoch": 7.89, "learning_rate": 1.171691556306941e-05, "loss": 0.5907, "step": 9335, "task_loss": 0.618355393409729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9991593360900879, "epoch": 7.89, "learning_rate": 1.1712219404527098e-05, "loss": 0.7812, "step": 9336, "task_loss": 0.8375954627990723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.431776762008667, "epoch": 7.89, "learning_rate": 1.1707523245984786e-05, "loss": 0.5448, "step": 9337, "task_loss": 0.2983088195323944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45495033264160156, "epoch": 7.89, "learning_rate": 1.1702827087442472e-05, "loss": 0.658, "step": 9338, "task_loss": 1.0990065336227417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8666210174560547, "epoch": 7.89, "learning_rate": 1.169813092890016e-05, "loss": 0.7471, "step": 9339, "task_loss": 1.8134522438049316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33931833505630493, "epoch": 7.89, "learning_rate": 1.1693434770357848e-05, "loss": 0.4538, "step": 9340, "task_loss": 0.4526831805706024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47321176528930664, "epoch": 7.9, "learning_rate": 1.1688738611815534e-05, "loss": 0.5406, "step": 9341, "task_loss": 0.08297832310199738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3966718018054962, "epoch": 7.9, "learning_rate": 1.1684042453273224e-05, "loss": 0.5896, "step": 9342, "task_loss": 0.21571119129657745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.378793865442276, "epoch": 7.9, "learning_rate": 1.1679346294730912e-05, "loss": 0.5975, "step": 9343, "task_loss": 0.9568120837211609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5830681324005127, "epoch": 7.9, "learning_rate": 1.1674650136188598e-05, "loss": 0.6582, "step": 9344, "task_loss": 0.758614182472229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34848782420158386, "epoch": 7.9, "learning_rate": 1.1669953977646286e-05, "loss": 0.5605, "step": 9345, "task_loss": 0.0972442477941513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6855816841125488, "epoch": 7.9, "learning_rate": 1.1665257819103974e-05, "loss": 0.6362, "step": 9346, "task_loss": 1.2701539993286133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46639764308929443, "epoch": 7.9, "learning_rate": 1.166056166056166e-05, "loss": 0.6236, "step": 9347, "task_loss": 0.44675567746162415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4786595106124878, "epoch": 7.9, "learning_rate": 1.1655865502019349e-05, "loss": 0.5772, "step": 9348, "task_loss": 0.4717656672000885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7530034780502319, "epoch": 7.9, "learning_rate": 1.1651169343477037e-05, "loss": 0.6256, "step": 9349, "task_loss": 0.7843053340911865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2761452794075012, "epoch": 7.9, "learning_rate": 1.1646473184934723e-05, "loss": 0.6142, "step": 9350, "task_loss": 0.5805830359458923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3149012327194214, "epoch": 7.9, "learning_rate": 1.1641777026392411e-05, "loss": 0.4722, "step": 9351, "task_loss": 0.09974182397127151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49099135398864746, "epoch": 7.9, "learning_rate": 1.16370808678501e-05, "loss": 0.547, "step": 9352, "task_loss": 0.609134316444397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7721101641654968, "epoch": 7.91, "learning_rate": 1.1632384709307786e-05, "loss": 0.6288, "step": 9353, "task_loss": 0.6604005694389343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4929327070713043, "epoch": 7.91, "learning_rate": 1.1627688550765474e-05, "loss": 0.5689, "step": 9354, "task_loss": 1.0445213317871094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3435975909233093, "epoch": 7.91, "learning_rate": 1.1622992392223162e-05, "loss": 0.4873, "step": 9355, "task_loss": 0.43401697278022766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4867996871471405, "epoch": 7.91, "learning_rate": 1.161829623368085e-05, "loss": 0.6451, "step": 9356, "task_loss": 0.6361439228057861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5806735157966614, "epoch": 7.91, "learning_rate": 1.1613600075138538e-05, "loss": 0.5538, "step": 9357, "task_loss": 0.9950587153434753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6415983438491821, "epoch": 7.91, "learning_rate": 1.1608903916596226e-05, "loss": 0.6855, "step": 9358, "task_loss": 0.512606680393219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8412670493125916, "epoch": 7.91, "learning_rate": 1.1604207758053912e-05, "loss": 0.7737, "step": 9359, "task_loss": 0.18173006176948547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4495844841003418, "epoch": 7.91, "learning_rate": 1.15995115995116e-05, "loss": 0.4667, "step": 9360, "task_loss": 0.5402575135231018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7390329241752625, "epoch": 7.91, "learning_rate": 1.1594815440969288e-05, "loss": 0.7245, "step": 9361, "task_loss": 1.429505705833435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5782394409179688, "epoch": 7.91, "learning_rate": 1.1590119282426976e-05, "loss": 0.5727, "step": 9362, "task_loss": 0.514115035533905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42428654432296753, "epoch": 7.91, "learning_rate": 1.1585423123884663e-05, "loss": 0.5416, "step": 9363, "task_loss": 0.3844887316226959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4159300625324249, "epoch": 7.91, "learning_rate": 1.158072696534235e-05, "loss": 0.5917, "step": 9364, "task_loss": 0.19934602081775665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7411067485809326, "epoch": 7.92, "learning_rate": 1.1576030806800039e-05, "loss": 0.6047, "step": 9365, "task_loss": 0.784552812576294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0113531351089478, "epoch": 7.92, "learning_rate": 1.1571334648257725e-05, "loss": 0.4562, "step": 9366, "task_loss": 0.7706130146980286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6570383310317993, "epoch": 7.92, "learning_rate": 1.1566638489715413e-05, "loss": 0.5382, "step": 9367, "task_loss": 0.4137342870235443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4768487811088562, "epoch": 7.92, "learning_rate": 1.1561942331173101e-05, "loss": 0.7668, "step": 9368, "task_loss": 0.5935700535774231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5979450941085815, "epoch": 7.92, "learning_rate": 1.1557246172630787e-05, "loss": 0.59, "step": 9369, "task_loss": 0.9261991381645203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35876426100730896, "epoch": 7.92, "learning_rate": 1.1552550014088475e-05, "loss": 0.6173, "step": 9370, "task_loss": 0.3439047038555145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6717727780342102, "epoch": 7.92, "learning_rate": 1.1547853855546163e-05, "loss": 0.5886, "step": 9371, "task_loss": 1.1706637144088745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3255844712257385, "epoch": 7.92, "learning_rate": 1.1543157697003852e-05, "loss": 0.5891, "step": 9372, "task_loss": 0.4770594537258148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4329717755317688, "epoch": 7.92, "learning_rate": 1.153846153846154e-05, "loss": 0.5787, "step": 9373, "task_loss": 1.2607519626617432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6130465865135193, "epoch": 7.92, "learning_rate": 1.1533765379919228e-05, "loss": 0.5124, "step": 9374, "task_loss": 0.6120911836624146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5117390155792236, "epoch": 7.92, "learning_rate": 1.1529069221376914e-05, "loss": 0.7744, "step": 9375, "task_loss": 1.0089675188064575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6334860324859619, "epoch": 7.93, "learning_rate": 1.1524373062834602e-05, "loss": 0.6035, "step": 9376, "task_loss": 1.2854617834091187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5256725549697876, "epoch": 7.93, "learning_rate": 1.151967690429229e-05, "loss": 0.783, "step": 9377, "task_loss": 0.37418946623802185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.69774329662323, "epoch": 7.93, "learning_rate": 1.1514980745749978e-05, "loss": 0.6212, "step": 9378, "task_loss": 1.050431728363037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5394479632377625, "epoch": 7.93, "learning_rate": 1.1510284587207664e-05, "loss": 0.5783, "step": 9379, "task_loss": 1.250901222229004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48475608229637146, "epoch": 7.93, "learning_rate": 1.1505588428665352e-05, "loss": 0.5517, "step": 9380, "task_loss": 0.9124853610992432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33954858779907227, "epoch": 7.93, "learning_rate": 1.150089227012304e-05, "loss": 0.4991, "step": 9381, "task_loss": 0.04125566780567169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33120328187942505, "epoch": 7.93, "learning_rate": 1.1496196111580727e-05, "loss": 0.4075, "step": 9382, "task_loss": 0.2102358490228653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9685516357421875, "epoch": 7.93, "learning_rate": 1.1491499953038415e-05, "loss": 0.7516, "step": 9383, "task_loss": 0.648388147354126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.527820348739624, "epoch": 7.93, "learning_rate": 1.1486803794496103e-05, "loss": 0.551, "step": 9384, "task_loss": 0.8378477692604065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35361695289611816, "epoch": 7.93, "learning_rate": 1.148210763595379e-05, "loss": 0.4626, "step": 9385, "task_loss": 0.7041658163070679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38424646854400635, "epoch": 7.93, "learning_rate": 1.1477411477411477e-05, "loss": 0.5422, "step": 9386, "task_loss": 1.1273832321166992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7163575887680054, "epoch": 7.93, "learning_rate": 1.1472715318869165e-05, "loss": 0.564, "step": 9387, "task_loss": 0.557891309261322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3816618323326111, "epoch": 7.94, "learning_rate": 1.1468019160326853e-05, "loss": 0.5523, "step": 9388, "task_loss": 1.1726878881454468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3462896943092346, "epoch": 7.94, "learning_rate": 1.1463323001784541e-05, "loss": 0.3341, "step": 9389, "task_loss": 0.5337329506874084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6958280205726624, "epoch": 7.94, "learning_rate": 1.145862684324223e-05, "loss": 0.5326, "step": 9390, "task_loss": 0.38178154826164246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5411871671676636, "epoch": 7.94, "learning_rate": 1.1453930684699916e-05, "loss": 0.5398, "step": 9391, "task_loss": 1.0458968877792358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41422969102859497, "epoch": 7.94, "learning_rate": 1.1449234526157604e-05, "loss": 0.6087, "step": 9392, "task_loss": 0.1508791148662567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5169859528541565, "epoch": 7.94, "learning_rate": 1.1444538367615292e-05, "loss": 0.5368, "step": 9393, "task_loss": 0.9140554070472717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.24324582517147064, "epoch": 7.94, "learning_rate": 1.143984220907298e-05, "loss": 0.5831, "step": 9394, "task_loss": 0.014985423535108566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7815532684326172, "epoch": 7.94, "learning_rate": 1.1435146050530666e-05, "loss": 0.6721, "step": 9395, "task_loss": 1.4106112718582153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3381294012069702, "epoch": 7.94, "learning_rate": 1.1430449891988354e-05, "loss": 0.4961, "step": 9396, "task_loss": 1.0227247476577759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6971325874328613, "epoch": 7.94, "learning_rate": 1.1425753733446042e-05, "loss": 0.5836, "step": 9397, "task_loss": 0.647834300994873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5957754850387573, "epoch": 7.94, "learning_rate": 1.1421057574903729e-05, "loss": 0.5272, "step": 9398, "task_loss": 2.4864888191223145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44528821110725403, "epoch": 7.94, "learning_rate": 1.1416361416361417e-05, "loss": 0.5829, "step": 9399, "task_loss": 0.3669188320636749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.545400083065033, "epoch": 7.95, "learning_rate": 1.1411665257819105e-05, "loss": 0.5479, "step": 9400, "task_loss": 0.8675976991653442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2651684284210205, "epoch": 7.95, "learning_rate": 1.1406969099276791e-05, "loss": 0.4637, "step": 9401, "task_loss": 0.13276241719722748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4663461744785309, "epoch": 7.95, "learning_rate": 1.1402272940734479e-05, "loss": 0.583, "step": 9402, "task_loss": 1.4454385042190552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5783052444458008, "epoch": 7.95, "learning_rate": 1.1397576782192167e-05, "loss": 0.7182, "step": 9403, "task_loss": 0.4205712080001831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5662481784820557, "epoch": 7.95, "learning_rate": 1.1392880623649855e-05, "loss": 0.6152, "step": 9404, "task_loss": 0.6159734129905701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.19339610636234283, "epoch": 7.95, "learning_rate": 1.1388184465107543e-05, "loss": 0.5176, "step": 9405, "task_loss": 0.39159074425697327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6262794137001038, "epoch": 7.95, "learning_rate": 1.1383488306565231e-05, "loss": 0.6609, "step": 9406, "task_loss": 0.647121250629425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41024795174598694, "epoch": 7.95, "learning_rate": 1.1378792148022917e-05, "loss": 0.5907, "step": 9407, "task_loss": 0.359412282705307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3848845362663269, "epoch": 7.95, "learning_rate": 1.1374095989480605e-05, "loss": 0.5126, "step": 9408, "task_loss": 0.23334404826164246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34076493978500366, "epoch": 7.95, "learning_rate": 1.1369399830938294e-05, "loss": 0.4897, "step": 9409, "task_loss": 0.397477388381958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6971105337142944, "epoch": 7.95, "learning_rate": 1.1364703672395982e-05, "loss": 0.7067, "step": 9410, "task_loss": 0.7745165228843689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6136131286621094, "epoch": 7.95, "learning_rate": 1.1360007513853668e-05, "loss": 0.4883, "step": 9411, "task_loss": 0.25786492228507996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.626451313495636, "epoch": 7.96, "learning_rate": 1.1355311355311356e-05, "loss": 0.6215, "step": 9412, "task_loss": 0.7057802081108093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29733797907829285, "epoch": 7.96, "learning_rate": 1.1350615196769044e-05, "loss": 0.4305, "step": 9413, "task_loss": 0.4450835585594177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.549385666847229, "epoch": 7.96, "learning_rate": 1.134591903822673e-05, "loss": 0.6516, "step": 9414, "task_loss": 0.3267466723918915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.447502076625824, "epoch": 7.96, "learning_rate": 1.1341222879684418e-05, "loss": 0.5668, "step": 9415, "task_loss": 1.4728764295578003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5129795074462891, "epoch": 7.96, "learning_rate": 1.1336526721142106e-05, "loss": 0.5342, "step": 9416, "task_loss": 0.37987199425697327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3828312158584595, "epoch": 7.96, "learning_rate": 1.1331830562599793e-05, "loss": 0.5412, "step": 9417, "task_loss": 0.44741037487983704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41594138741493225, "epoch": 7.96, "learning_rate": 1.132713440405748e-05, "loss": 0.5972, "step": 9418, "task_loss": 0.5375856161117554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5363665819168091, "epoch": 7.96, "learning_rate": 1.132243824551517e-05, "loss": 0.6474, "step": 9419, "task_loss": 0.848179042339325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7211003303527832, "epoch": 7.96, "learning_rate": 1.1317742086972857e-05, "loss": 0.5376, "step": 9420, "task_loss": 0.8035428524017334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45320478081703186, "epoch": 7.96, "learning_rate": 1.1313045928430545e-05, "loss": 0.4614, "step": 9421, "task_loss": 0.26824578642845154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6729069948196411, "epoch": 7.96, "learning_rate": 1.1308349769888233e-05, "loss": 0.9207, "step": 9422, "task_loss": 1.2715507745742798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5405430793762207, "epoch": 7.96, "learning_rate": 1.130365361134592e-05, "loss": 0.4873, "step": 9423, "task_loss": 0.8911815285682678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4203943610191345, "epoch": 7.97, "learning_rate": 1.1298957452803607e-05, "loss": 0.5067, "step": 9424, "task_loss": 0.35809504985809326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9075993299484253, "epoch": 7.97, "learning_rate": 1.1294261294261295e-05, "loss": 0.6135, "step": 9425, "task_loss": 0.914237916469574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37385427951812744, "epoch": 7.97, "learning_rate": 1.1289565135718982e-05, "loss": 0.5541, "step": 9426, "task_loss": 0.24951611459255219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5917778015136719, "epoch": 7.97, "learning_rate": 1.128486897717667e-05, "loss": 0.5733, "step": 9427, "task_loss": 0.8164920210838318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9546849131584167, "epoch": 7.97, "learning_rate": 1.1280172818634358e-05, "loss": 0.7081, "step": 9428, "task_loss": 0.8329244256019592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.337451696395874, "epoch": 7.97, "learning_rate": 1.1275476660092046e-05, "loss": 0.5703, "step": 9429, "task_loss": 0.3117765486240387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.755767822265625, "epoch": 7.97, "learning_rate": 1.1270780501549732e-05, "loss": 0.5853, "step": 9430, "task_loss": 1.1506482362747192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5036759972572327, "epoch": 7.97, "learning_rate": 1.126608434300742e-05, "loss": 0.7637, "step": 9431, "task_loss": 0.11645679175853729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8570623993873596, "epoch": 7.97, "learning_rate": 1.1261388184465108e-05, "loss": 0.5943, "step": 9432, "task_loss": 0.27177175879478455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8976597785949707, "epoch": 7.97, "learning_rate": 1.1256692025922794e-05, "loss": 0.7173, "step": 9433, "task_loss": 1.343791127204895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4268742501735687, "epoch": 7.97, "learning_rate": 1.1251995867380482e-05, "loss": 0.482, "step": 9434, "task_loss": 0.7479017376899719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5782151818275452, "epoch": 7.97, "learning_rate": 1.1247299708838172e-05, "loss": 0.8269, "step": 9435, "task_loss": 0.2544817328453064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5297929048538208, "epoch": 7.98, "learning_rate": 1.1242603550295859e-05, "loss": 0.7754, "step": 9436, "task_loss": 0.896498441696167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8297819495201111, "epoch": 7.98, "learning_rate": 1.1237907391753547e-05, "loss": 0.7815, "step": 9437, "task_loss": 1.0301399230957031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.30361828207969666, "epoch": 7.98, "learning_rate": 1.1233211233211235e-05, "loss": 0.4151, "step": 9438, "task_loss": 0.6235243678092957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4095439910888672, "epoch": 7.98, "learning_rate": 1.1228515074668921e-05, "loss": 0.5248, "step": 9439, "task_loss": 0.7862451672554016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6722391843795776, "epoch": 7.98, "learning_rate": 1.1223818916126609e-05, "loss": 0.6351, "step": 9440, "task_loss": 0.8686351180076599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41035258769989014, "epoch": 7.98, "learning_rate": 1.1219122757584297e-05, "loss": 0.4962, "step": 9441, "task_loss": 0.618217945098877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4291980564594269, "epoch": 7.98, "learning_rate": 1.1214426599041983e-05, "loss": 0.5716, "step": 9442, "task_loss": 0.9223406314849854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6497535109519958, "epoch": 7.98, "learning_rate": 1.1209730440499671e-05, "loss": 0.6199, "step": 9443, "task_loss": 1.1657088994979858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6508893966674805, "epoch": 7.98, "learning_rate": 1.120503428195736e-05, "loss": 0.5825, "step": 9444, "task_loss": 0.4300452470779419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2782370150089264, "epoch": 7.98, "learning_rate": 1.1200338123415047e-05, "loss": 0.6554, "step": 9445, "task_loss": 0.7699475288391113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6011999845504761, "epoch": 7.98, "learning_rate": 1.1195641964872734e-05, "loss": 0.4972, "step": 9446, "task_loss": 1.4702892303466797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9836977124214172, "epoch": 7.99, "learning_rate": 1.1190945806330422e-05, "loss": 0.5714, "step": 9447, "task_loss": 0.66367107629776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4072164297103882, "epoch": 7.99, "learning_rate": 1.118624964778811e-05, "loss": 0.7094, "step": 9448, "task_loss": 0.48776865005493164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.639613151550293, "epoch": 7.99, "learning_rate": 1.1181553489245796e-05, "loss": 0.7603, "step": 9449, "task_loss": 1.1166512966156006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3758698105812073, "epoch": 7.99, "learning_rate": 1.1176857330703486e-05, "loss": 0.5509, "step": 9450, "task_loss": 0.17548464238643646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.380029559135437, "epoch": 7.99, "learning_rate": 1.1172161172161174e-05, "loss": 0.5589, "step": 9451, "task_loss": 1.0709577798843384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5636839866638184, "epoch": 7.99, "learning_rate": 1.116746501361886e-05, "loss": 0.5929, "step": 9452, "task_loss": 0.7805793881416321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43999356031417847, "epoch": 7.99, "learning_rate": 1.1162768855076548e-05, "loss": 0.6019, "step": 9453, "task_loss": 0.8136491775512695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5958019495010376, "epoch": 7.99, "learning_rate": 1.1158072696534236e-05, "loss": 0.6264, "step": 9454, "task_loss": 0.9262558221817017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6469594240188599, "epoch": 7.99, "learning_rate": 1.1153376537991923e-05, "loss": 0.605, "step": 9455, "task_loss": 1.3161710500717163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3921637535095215, "epoch": 7.99, "learning_rate": 1.114868037944961e-05, "loss": 0.6001, "step": 9456, "task_loss": 0.14083170890808105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45663154125213623, "epoch": 7.99, "learning_rate": 1.1143984220907299e-05, "loss": 0.514, "step": 9457, "task_loss": 0.32933858036994934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6048632860183716, "epoch": 7.99, "learning_rate": 1.1139288062364985e-05, "loss": 0.7203, "step": 9458, "task_loss": 0.4054702818393707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8617388606071472, "epoch": 8.0, "learning_rate": 1.1134591903822673e-05, "loss": 0.5978, "step": 9459, "task_loss": 0.5226293206214905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7058023810386658, "epoch": 8.0, "learning_rate": 1.1129895745280361e-05, "loss": 0.7562, "step": 9460, "task_loss": 0.8970572352409363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0723412036895752, "epoch": 8.0, "learning_rate": 1.112519958673805e-05, "loss": 0.5763, "step": 9461, "task_loss": 0.9411733746528625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8886710405349731, "epoch": 8.0, "learning_rate": 1.1120503428195736e-05, "loss": 0.6592, "step": 9462, "task_loss": 1.4397881031036377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8920575380325317, "epoch": 8.0, "learning_rate": 1.1115807269653424e-05, "loss": 0.7466, "step": 9463, "task_loss": 1.524768352508545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6153082847595215, "epoch": 8.0, "learning_rate": 1.1111111111111112e-05, "loss": 0.4977, "step": 9464, "task_loss": 0.5239614248275757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5087453126907349, "epoch": 8.0, "learning_rate": 1.11064149525688e-05, "loss": 1.1913, "step": 9465, "task_loss": 0.32336002588272095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35140693187713623, "epoch": 8.0, "learning_rate": 1.1101718794026488e-05, "loss": 0.4637, "step": 9466, "task_loss": 0.25943663716316223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9807056188583374, "epoch": 8.0, "learning_rate": 1.1097022635484176e-05, "loss": 0.5683, "step": 9467, "task_loss": 0.7647372484207153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7044470310211182, "epoch": 8.0, "learning_rate": 1.1092326476941862e-05, "loss": 0.5468, "step": 9468, "task_loss": 1.741880178451538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3423113226890564, "epoch": 8.0, "learning_rate": 1.108763031839955e-05, "loss": 0.5874, "step": 9469, "task_loss": 0.25047314167022705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37348049879074097, "epoch": 8.01, "learning_rate": 1.1082934159857238e-05, "loss": 0.4196, "step": 9470, "task_loss": 0.7603366374969482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5419629812240601, "epoch": 8.01, "learning_rate": 1.1078238001314924e-05, "loss": 0.5844, "step": 9471, "task_loss": 0.7311228513717651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6387829780578613, "epoch": 8.01, "learning_rate": 1.1073541842772613e-05, "loss": 0.7971, "step": 9472, "task_loss": 0.5276995301246643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5892733335494995, "epoch": 8.01, "learning_rate": 1.10688456842303e-05, "loss": 0.5315, "step": 9473, "task_loss": 1.129476547241211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6989885568618774, "epoch": 8.01, "learning_rate": 1.1064149525687987e-05, "loss": 0.7188, "step": 9474, "task_loss": 0.8698194026947021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37575286626815796, "epoch": 8.01, "learning_rate": 1.1059453367145675e-05, "loss": 0.661, "step": 9475, "task_loss": 1.0630751848220825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6416707038879395, "epoch": 8.01, "learning_rate": 1.1054757208603363e-05, "loss": 0.6069, "step": 9476, "task_loss": 1.0367988348007202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3634396195411682, "epoch": 8.01, "learning_rate": 1.1050061050061051e-05, "loss": 0.5984, "step": 9477, "task_loss": 0.18622416257858276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9245830774307251, "epoch": 8.01, "learning_rate": 1.1045364891518737e-05, "loss": 0.6301, "step": 9478, "task_loss": 0.6098285913467407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.932351291179657, "epoch": 8.01, "learning_rate": 1.1040668732976425e-05, "loss": 0.6667, "step": 9479, "task_loss": 0.5916451811790466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6613271236419678, "epoch": 8.01, "learning_rate": 1.1035972574434113e-05, "loss": 0.6052, "step": 9480, "task_loss": 0.8478948473930359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5787873864173889, "epoch": 8.01, "learning_rate": 1.1031276415891801e-05, "loss": 0.5579, "step": 9481, "task_loss": 0.6867531538009644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5887365341186523, "epoch": 8.02, "learning_rate": 1.102658025734949e-05, "loss": 0.6932, "step": 9482, "task_loss": 1.1603546142578125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7415763735771179, "epoch": 8.02, "learning_rate": 1.1021884098807178e-05, "loss": 0.6246, "step": 9483, "task_loss": 0.28121185302734375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33076557517051697, "epoch": 8.02, "learning_rate": 1.1017187940264864e-05, "loss": 0.4835, "step": 9484, "task_loss": 0.31690526008605957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.602699875831604, "epoch": 8.02, "learning_rate": 1.1012491781722552e-05, "loss": 0.5299, "step": 9485, "task_loss": 0.2353859841823578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7531696557998657, "epoch": 8.02, "learning_rate": 1.100779562318024e-05, "loss": 0.5674, "step": 9486, "task_loss": 1.1661425828933716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4256278872489929, "epoch": 8.02, "learning_rate": 1.1003099464637926e-05, "loss": 0.549, "step": 9487, "task_loss": 0.3527028560638428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5779841542243958, "epoch": 8.02, "learning_rate": 1.0998403306095614e-05, "loss": 0.6234, "step": 9488, "task_loss": 0.7386232614517212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8755516409873962, "epoch": 8.02, "learning_rate": 1.0993707147553302e-05, "loss": 0.5677, "step": 9489, "task_loss": 1.8637336492538452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6358311772346497, "epoch": 8.02, "learning_rate": 1.0989010989010989e-05, "loss": 0.6963, "step": 9490, "task_loss": 1.027912974357605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8191447257995605, "epoch": 8.02, "learning_rate": 1.0984314830468677e-05, "loss": 0.6776, "step": 9491, "task_loss": 2.3780124187469482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.777106761932373, "epoch": 8.02, "learning_rate": 1.0979618671926365e-05, "loss": 0.6311, "step": 9492, "task_loss": 1.0849230289459229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5286922454833984, "epoch": 8.02, "learning_rate": 1.0974922513384053e-05, "loss": 0.7231, "step": 9493, "task_loss": 0.8071739077568054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38974571228027344, "epoch": 8.03, "learning_rate": 1.0970226354841739e-05, "loss": 0.5216, "step": 9494, "task_loss": 0.09160875529050827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.505257785320282, "epoch": 8.03, "learning_rate": 1.0965530196299427e-05, "loss": 0.5216, "step": 9495, "task_loss": 0.7757536768913269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7242074012756348, "epoch": 8.03, "learning_rate": 1.0960834037757115e-05, "loss": 0.6596, "step": 9496, "task_loss": 0.49860337376594543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6070175766944885, "epoch": 8.03, "learning_rate": 1.0956137879214803e-05, "loss": 0.5537, "step": 9497, "task_loss": 0.6283063888549805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9708364009857178, "epoch": 8.03, "learning_rate": 1.0951441720672491e-05, "loss": 0.6207, "step": 9498, "task_loss": 0.8808799386024475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5925873517990112, "epoch": 8.03, "learning_rate": 1.094674556213018e-05, "loss": 0.6056, "step": 9499, "task_loss": 0.476372629404068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5832890272140503, "epoch": 8.03, "learning_rate": 1.0942049403587866e-05, "loss": 0.7154, "step": 9500, "task_loss": 0.8546236157417297 }, { "epoch": 8.03, "eval_accuracy": 0.9027326732673268, "eval_loss": 0.3747362792491913, "eval_runtime": 223.9953, "eval_samples_per_second": 112.726, "eval_steps_per_second": 0.884, "step": 9500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8868376016616821, "epoch": 8.03, "learning_rate": 1.0937353245045554e-05, "loss": 0.6478, "step": 9501, "task_loss": 0.7333509922027588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46065106987953186, "epoch": 8.03, "learning_rate": 1.0932657086503242e-05, "loss": 0.6011, "step": 9502, "task_loss": 1.1345844268798828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7129539251327515, "epoch": 8.03, "learning_rate": 1.0927960927960928e-05, "loss": 0.4946, "step": 9503, "task_loss": 0.7332802414894104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5098767280578613, "epoch": 8.03, "learning_rate": 1.0923264769418616e-05, "loss": 0.5035, "step": 9504, "task_loss": 0.8573485612869263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7279584407806396, "epoch": 8.03, "learning_rate": 1.0918568610876304e-05, "loss": 0.6466, "step": 9505, "task_loss": 0.8031750321388245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49133336544036865, "epoch": 8.04, "learning_rate": 1.091387245233399e-05, "loss": 0.6402, "step": 9506, "task_loss": 0.6190178394317627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5686712861061096, "epoch": 8.04, "learning_rate": 1.0909176293791678e-05, "loss": 0.6056, "step": 9507, "task_loss": 0.8950534462928772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8551054000854492, "epoch": 8.04, "learning_rate": 1.0904480135249366e-05, "loss": 0.6624, "step": 9508, "task_loss": 1.2272008657455444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5302655100822449, "epoch": 8.04, "learning_rate": 1.0899783976707053e-05, "loss": 0.5454, "step": 9509, "task_loss": 0.26878678798675537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0813302993774414, "epoch": 8.04, "learning_rate": 1.0895087818164741e-05, "loss": 0.6453, "step": 9510, "task_loss": 1.7045310735702515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35846248269081116, "epoch": 8.04, "learning_rate": 1.0890391659622429e-05, "loss": 0.4954, "step": 9511, "task_loss": 1.0196921825408936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3360680043697357, "epoch": 8.04, "learning_rate": 1.0885695501080117e-05, "loss": 0.4513, "step": 9512, "task_loss": 0.3787325620651245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6205782890319824, "epoch": 8.04, "learning_rate": 1.0880999342537805e-05, "loss": 0.4763, "step": 9513, "task_loss": 1.4419023990631104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3531428575515747, "epoch": 8.04, "learning_rate": 1.0876303183995493e-05, "loss": 0.6242, "step": 9514, "task_loss": 0.7528399229049683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7560111284255981, "epoch": 8.04, "learning_rate": 1.087160702545318e-05, "loss": 0.6567, "step": 9515, "task_loss": 1.1051634550094604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4271329641342163, "epoch": 8.04, "learning_rate": 1.0866910866910867e-05, "loss": 0.508, "step": 9516, "task_loss": 0.633379340171814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43901684880256653, "epoch": 8.04, "learning_rate": 1.0862214708368555e-05, "loss": 0.565, "step": 9517, "task_loss": 0.490773469209671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6390568017959595, "epoch": 8.05, "learning_rate": 1.0857518549826243e-05, "loss": 0.5088, "step": 9518, "task_loss": 0.9010896682739258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5324752926826477, "epoch": 8.05, "learning_rate": 1.085282239128393e-05, "loss": 0.5354, "step": 9519, "task_loss": 0.8103591203689575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35582849383354187, "epoch": 8.05, "learning_rate": 1.0848126232741618e-05, "loss": 0.532, "step": 9520, "task_loss": 0.04557404667139053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7376825213432312, "epoch": 8.05, "learning_rate": 1.0843430074199306e-05, "loss": 0.6497, "step": 9521, "task_loss": 0.30085164308547974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7397708892822266, "epoch": 8.05, "learning_rate": 1.0838733915656992e-05, "loss": 0.7921, "step": 9522, "task_loss": 0.5642065405845642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47309428453445435, "epoch": 8.05, "learning_rate": 1.083403775711468e-05, "loss": 0.6152, "step": 9523, "task_loss": 0.34019735455513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5247946977615356, "epoch": 8.05, "learning_rate": 1.0829341598572368e-05, "loss": 0.7015, "step": 9524, "task_loss": 0.4546815752983093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3682272434234619, "epoch": 8.05, "learning_rate": 1.0824645440030055e-05, "loss": 0.4998, "step": 9525, "task_loss": 0.48549848794937134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4960305094718933, "epoch": 8.05, "learning_rate": 1.0819949281487743e-05, "loss": 0.605, "step": 9526, "task_loss": 0.13171499967575073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8667436838150024, "epoch": 8.05, "learning_rate": 1.0815253122945432e-05, "loss": 0.5947, "step": 9527, "task_loss": 1.1537548303604126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4228610396385193, "epoch": 8.05, "learning_rate": 1.0810556964403119e-05, "loss": 0.4245, "step": 9528, "task_loss": 0.6162853240966797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4537409842014313, "epoch": 8.05, "learning_rate": 1.0805860805860807e-05, "loss": 0.625, "step": 9529, "task_loss": 1.1985092163085938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3630886673927307, "epoch": 8.06, "learning_rate": 1.0801164647318495e-05, "loss": 0.5704, "step": 9530, "task_loss": 0.4044733941555023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9868044853210449, "epoch": 8.06, "learning_rate": 1.0796468488776181e-05, "loss": 0.7739, "step": 9531, "task_loss": 0.338076651096344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.08309006690979, "epoch": 8.06, "learning_rate": 1.0791772330233869e-05, "loss": 0.5393, "step": 9532, "task_loss": 0.46671515703201294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6502399444580078, "epoch": 8.06, "learning_rate": 1.0787076171691557e-05, "loss": 0.5071, "step": 9533, "task_loss": 0.7282825708389282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4505136013031006, "epoch": 8.06, "learning_rate": 1.0782380013149245e-05, "loss": 0.5661, "step": 9534, "task_loss": 0.8872408866882324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36288881301879883, "epoch": 8.06, "learning_rate": 1.0777683854606932e-05, "loss": 0.475, "step": 9535, "task_loss": 0.20066292583942413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39155423641204834, "epoch": 8.06, "learning_rate": 1.077298769606462e-05, "loss": 0.3976, "step": 9536, "task_loss": 0.1572062373161316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38251805305480957, "epoch": 8.06, "learning_rate": 1.0768291537522308e-05, "loss": 0.3617, "step": 9537, "task_loss": 0.36664506793022156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.636491060256958, "epoch": 8.06, "learning_rate": 1.0763595378979994e-05, "loss": 0.4941, "step": 9538, "task_loss": 1.0368419885635376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7894288301467896, "epoch": 8.06, "learning_rate": 1.0758899220437682e-05, "loss": 0.5379, "step": 9539, "task_loss": 1.3558489084243774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48311012983322144, "epoch": 8.06, "learning_rate": 1.075420306189537e-05, "loss": 0.63, "step": 9540, "task_loss": 0.1996113806962967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5801429152488708, "epoch": 8.07, "learning_rate": 1.0749506903353056e-05, "loss": 0.6627, "step": 9541, "task_loss": 0.8322306871414185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4267495274543762, "epoch": 8.07, "learning_rate": 1.0744810744810746e-05, "loss": 0.7159, "step": 9542, "task_loss": 1.0319888591766357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5092719793319702, "epoch": 8.07, "learning_rate": 1.0740114586268434e-05, "loss": 0.7003, "step": 9543, "task_loss": 0.29587867856025696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6694880127906799, "epoch": 8.07, "learning_rate": 1.073541842772612e-05, "loss": 0.5889, "step": 9544, "task_loss": 0.4104151725769043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5187952518463135, "epoch": 8.07, "learning_rate": 1.0730722269183808e-05, "loss": 0.5635, "step": 9545, "task_loss": 0.42076292634010315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6307637691497803, "epoch": 8.07, "learning_rate": 1.0726026110641497e-05, "loss": 0.4834, "step": 9546, "task_loss": 0.841456949710846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6678224802017212, "epoch": 8.07, "learning_rate": 1.0721329952099183e-05, "loss": 0.823, "step": 9547, "task_loss": 0.6962177753448486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46575847268104553, "epoch": 8.07, "learning_rate": 1.0716633793556871e-05, "loss": 0.6128, "step": 9548, "task_loss": 0.6165735721588135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.588721513748169, "epoch": 8.07, "learning_rate": 1.0711937635014559e-05, "loss": 0.6132, "step": 9549, "task_loss": 1.049211025238037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3837040066719055, "epoch": 8.07, "learning_rate": 1.0707241476472247e-05, "loss": 0.4943, "step": 9550, "task_loss": 0.05801668018102646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8307170271873474, "epoch": 8.07, "learning_rate": 1.0702545317929933e-05, "loss": 0.5732, "step": 9551, "task_loss": 0.7106835842132568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29201436042785645, "epoch": 8.07, "learning_rate": 1.0697849159387621e-05, "loss": 0.3547, "step": 9552, "task_loss": 0.6507024765014648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.640367329120636, "epoch": 8.08, "learning_rate": 1.069315300084531e-05, "loss": 0.5405, "step": 9553, "task_loss": 0.3334060311317444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6468420624732971, "epoch": 8.08, "learning_rate": 1.0688456842302996e-05, "loss": 0.5793, "step": 9554, "task_loss": 0.8326210975646973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8298318386077881, "epoch": 8.08, "learning_rate": 1.0683760683760684e-05, "loss": 0.6849, "step": 9555, "task_loss": 0.8625490665435791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6658575534820557, "epoch": 8.08, "learning_rate": 1.0679064525218372e-05, "loss": 0.6217, "step": 9556, "task_loss": 0.45563822984695435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5270939469337463, "epoch": 8.08, "learning_rate": 1.0674368366676058e-05, "loss": 0.578, "step": 9557, "task_loss": 0.41823065280914307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3209911286830902, "epoch": 8.08, "learning_rate": 1.0669672208133748e-05, "loss": 0.4601, "step": 9558, "task_loss": 0.3538365066051483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6265459656715393, "epoch": 8.08, "learning_rate": 1.0664976049591436e-05, "loss": 0.67, "step": 9559, "task_loss": 0.6380525827407837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.797144889831543, "epoch": 8.08, "learning_rate": 1.0660279891049122e-05, "loss": 0.6761, "step": 9560, "task_loss": 1.038027048110962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7234443426132202, "epoch": 8.08, "learning_rate": 1.065558373250681e-05, "loss": 0.5555, "step": 9561, "task_loss": 1.0302376747131348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46195951104164124, "epoch": 8.08, "learning_rate": 1.0650887573964498e-05, "loss": 0.5881, "step": 9562, "task_loss": 0.4535996615886688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3344178795814514, "epoch": 8.08, "learning_rate": 1.0646191415422185e-05, "loss": 0.5927, "step": 9563, "task_loss": 1.1582939624786377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5792403817176819, "epoch": 8.08, "learning_rate": 1.0641495256879873e-05, "loss": 0.6126, "step": 9564, "task_loss": 0.8111135959625244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49029654264450073, "epoch": 8.09, "learning_rate": 1.063679909833756e-05, "loss": 0.7324, "step": 9565, "task_loss": 0.7151516079902649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5801153779029846, "epoch": 8.09, "learning_rate": 1.0632102939795249e-05, "loss": 0.4641, "step": 9566, "task_loss": 0.6590712666511536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.714347243309021, "epoch": 8.09, "learning_rate": 1.0627406781252935e-05, "loss": 0.6798, "step": 9567, "task_loss": 1.2181792259216309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7921946048736572, "epoch": 8.09, "learning_rate": 1.0622710622710623e-05, "loss": 0.5165, "step": 9568, "task_loss": 1.1708883047103882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8510252237319946, "epoch": 8.09, "learning_rate": 1.0618014464168311e-05, "loss": 0.7812, "step": 9569, "task_loss": 1.8644523620605469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5526095032691956, "epoch": 8.09, "learning_rate": 1.0613318305625997e-05, "loss": 0.7319, "step": 9570, "task_loss": 1.4761714935302734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4818944036960602, "epoch": 8.09, "learning_rate": 1.0608622147083686e-05, "loss": 0.528, "step": 9571, "task_loss": 1.137098789215088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5996138453483582, "epoch": 8.09, "learning_rate": 1.0603925988541374e-05, "loss": 0.5531, "step": 9572, "task_loss": 0.6259656548500061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5769083499908447, "epoch": 8.09, "learning_rate": 1.0599229829999062e-05, "loss": 0.3475, "step": 9573, "task_loss": 0.9477295875549316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4277808666229248, "epoch": 8.09, "learning_rate": 1.059453367145675e-05, "loss": 0.6254, "step": 9574, "task_loss": 0.2860794961452484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2009975910186768, "epoch": 8.09, "learning_rate": 1.0589837512914438e-05, "loss": 0.8368, "step": 9575, "task_loss": 0.9525521993637085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7183001041412354, "epoch": 8.09, "learning_rate": 1.0585141354372124e-05, "loss": 0.6048, "step": 9576, "task_loss": 0.5646456480026245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6332523226737976, "epoch": 8.1, "learning_rate": 1.0580445195829812e-05, "loss": 0.532, "step": 9577, "task_loss": 0.7462270259857178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5894363522529602, "epoch": 8.1, "learning_rate": 1.05757490372875e-05, "loss": 0.5, "step": 9578, "task_loss": 0.9264602661132812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7154154181480408, "epoch": 8.1, "learning_rate": 1.0571052878745186e-05, "loss": 0.7415, "step": 9579, "task_loss": 0.8783657550811768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6904792189598083, "epoch": 8.1, "learning_rate": 1.0566356720202874e-05, "loss": 0.5578, "step": 9580, "task_loss": 0.3036198616027832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7932901382446289, "epoch": 8.1, "learning_rate": 1.0561660561660562e-05, "loss": 0.6022, "step": 9581, "task_loss": 1.378924012184143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33150574564933777, "epoch": 8.1, "learning_rate": 1.0556964403118249e-05, "loss": 0.4565, "step": 9582, "task_loss": 0.6437826156616211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4403172433376312, "epoch": 8.1, "learning_rate": 1.0552268244575937e-05, "loss": 0.5493, "step": 9583, "task_loss": 0.8350158333778381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.689140260219574, "epoch": 8.1, "learning_rate": 1.0547572086033625e-05, "loss": 0.643, "step": 9584, "task_loss": 0.2601122260093689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2451862394809723, "epoch": 8.1, "learning_rate": 1.0542875927491313e-05, "loss": 0.5506, "step": 9585, "task_loss": 0.2814542055130005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3596716821193695, "epoch": 8.1, "learning_rate": 1.0538179768949e-05, "loss": 0.6561, "step": 9586, "task_loss": 1.1112534999847412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6719481945037842, "epoch": 8.1, "learning_rate": 1.0533483610406687e-05, "loss": 0.5892, "step": 9587, "task_loss": 1.1988041400909424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4640606641769409, "epoch": 8.1, "learning_rate": 1.0528787451864375e-05, "loss": 0.6804, "step": 9588, "task_loss": 1.0938695669174194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8465560674667358, "epoch": 8.11, "learning_rate": 1.0524091293322063e-05, "loss": 0.6917, "step": 9589, "task_loss": 0.3841468095779419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7315590381622314, "epoch": 8.11, "learning_rate": 1.0519395134779751e-05, "loss": 0.5623, "step": 9590, "task_loss": 0.727066695690155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4801850914955139, "epoch": 8.11, "learning_rate": 1.051469897623744e-05, "loss": 0.6009, "step": 9591, "task_loss": 0.6770482063293457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5947733521461487, "epoch": 8.11, "learning_rate": 1.0510002817695126e-05, "loss": 0.5948, "step": 9592, "task_loss": 1.4456790685653687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5531534552574158, "epoch": 8.11, "learning_rate": 1.0505306659152814e-05, "loss": 0.4907, "step": 9593, "task_loss": 0.7156304717063904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3617919087409973, "epoch": 8.11, "learning_rate": 1.0500610500610502e-05, "loss": 0.5475, "step": 9594, "task_loss": 0.32918602228164673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5400863885879517, "epoch": 8.11, "learning_rate": 1.0495914342068188e-05, "loss": 0.6238, "step": 9595, "task_loss": 1.0703479051589966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6788735389709473, "epoch": 8.11, "learning_rate": 1.0491218183525876e-05, "loss": 0.5257, "step": 9596, "task_loss": 0.27742376923561096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43469178676605225, "epoch": 8.11, "learning_rate": 1.0486522024983564e-05, "loss": 0.4515, "step": 9597, "task_loss": 0.4679819941520691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6996597051620483, "epoch": 8.11, "learning_rate": 1.048182586644125e-05, "loss": 0.5831, "step": 9598, "task_loss": 0.3744509518146515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7028446197509766, "epoch": 8.11, "learning_rate": 1.0477129707898939e-05, "loss": 0.5999, "step": 9599, "task_loss": 0.18717308342456818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4019017815589905, "epoch": 8.11, "learning_rate": 1.0472433549356627e-05, "loss": 0.5164, "step": 9600, "task_loss": 0.7360302805900574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4179641604423523, "epoch": 8.12, "learning_rate": 1.0467737390814315e-05, "loss": 0.6076, "step": 9601, "task_loss": 1.0537219047546387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38949131965637207, "epoch": 8.12, "learning_rate": 1.0463041232272001e-05, "loss": 0.4855, "step": 9602, "task_loss": 0.7557820677757263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7016940116882324, "epoch": 8.12, "learning_rate": 1.0458345073729689e-05, "loss": 0.8547, "step": 9603, "task_loss": 1.4766907691955566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.616112470626831, "epoch": 8.12, "learning_rate": 1.0453648915187377e-05, "loss": 0.4874, "step": 9604, "task_loss": 0.3295974135398865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6480559706687927, "epoch": 8.12, "learning_rate": 1.0448952756645065e-05, "loss": 0.5824, "step": 9605, "task_loss": 0.9183165431022644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5669050216674805, "epoch": 8.12, "learning_rate": 1.0444256598102753e-05, "loss": 0.5827, "step": 9606, "task_loss": 0.3981649875640869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.475372850894928, "epoch": 8.12, "learning_rate": 1.0439560439560441e-05, "loss": 0.5199, "step": 9607, "task_loss": 0.6136968731880188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6683482527732849, "epoch": 8.12, "learning_rate": 1.0434864281018128e-05, "loss": 0.5858, "step": 9608, "task_loss": 0.6257038712501526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4379020929336548, "epoch": 8.12, "learning_rate": 1.0430168122475816e-05, "loss": 0.5896, "step": 9609, "task_loss": 0.21730482578277588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7050869464874268, "epoch": 8.12, "learning_rate": 1.0425471963933504e-05, "loss": 0.4771, "step": 9610, "task_loss": 1.622050166130066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6460671424865723, "epoch": 8.12, "learning_rate": 1.042077580539119e-05, "loss": 0.5999, "step": 9611, "task_loss": 0.6066354513168335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3074805736541748, "epoch": 8.13, "learning_rate": 1.0416079646848878e-05, "loss": 0.4468, "step": 9612, "task_loss": 0.2036624699831009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4348602294921875, "epoch": 8.13, "learning_rate": 1.0411383488306566e-05, "loss": 0.5808, "step": 9613, "task_loss": 0.42050686478614807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4704669713973999, "epoch": 8.13, "learning_rate": 1.0406687329764252e-05, "loss": 0.4672, "step": 9614, "task_loss": 0.11978614330291748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7910167574882507, "epoch": 8.13, "learning_rate": 1.040199117122194e-05, "loss": 0.6383, "step": 9615, "task_loss": 0.571071207523346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37179118394851685, "epoch": 8.13, "learning_rate": 1.0397295012679628e-05, "loss": 0.4058, "step": 9616, "task_loss": 1.1320393085479736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8174391388893127, "epoch": 8.13, "learning_rate": 1.0392598854137316e-05, "loss": 0.7418, "step": 9617, "task_loss": 0.4518655836582184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36635422706604004, "epoch": 8.13, "learning_rate": 1.0387902695595003e-05, "loss": 0.4093, "step": 9618, "task_loss": 0.19373680651187897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7119874954223633, "epoch": 8.13, "learning_rate": 1.0383206537052693e-05, "loss": 0.6892, "step": 9619, "task_loss": 1.0759050846099854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4808367192745209, "epoch": 8.13, "learning_rate": 1.0378510378510379e-05, "loss": 0.4786, "step": 9620, "task_loss": 0.8872754573822021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4939457178115845, "epoch": 8.13, "learning_rate": 1.0373814219968067e-05, "loss": 0.5765, "step": 9621, "task_loss": 1.219308614730835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7012231349945068, "epoch": 8.13, "learning_rate": 1.0369118061425755e-05, "loss": 0.6074, "step": 9622, "task_loss": 1.0000358819961548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4341309666633606, "epoch": 8.13, "learning_rate": 1.0364421902883443e-05, "loss": 0.4657, "step": 9623, "task_loss": 0.29882434010505676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7481033205986023, "epoch": 8.14, "learning_rate": 1.035972574434113e-05, "loss": 0.5104, "step": 9624, "task_loss": 1.2134045362472534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5169277191162109, "epoch": 8.14, "learning_rate": 1.0355029585798817e-05, "loss": 0.6156, "step": 9625, "task_loss": 0.16836602985858917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.314828097820282, "epoch": 8.14, "learning_rate": 1.0350333427256505e-05, "loss": 0.4868, "step": 9626, "task_loss": 0.06599763035774231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32037845253944397, "epoch": 8.14, "learning_rate": 1.0345637268714192e-05, "loss": 0.5649, "step": 9627, "task_loss": 0.6460191607475281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.21064987778663635, "epoch": 8.14, "learning_rate": 1.034094111017188e-05, "loss": 0.5514, "step": 9628, "task_loss": 0.038453198969364166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6494431495666504, "epoch": 8.14, "learning_rate": 1.0336244951629568e-05, "loss": 0.6507, "step": 9629, "task_loss": 0.983591616153717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4018845558166504, "epoch": 8.14, "learning_rate": 1.0331548793087254e-05, "loss": 0.5063, "step": 9630, "task_loss": 0.605381190776825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3669758141040802, "epoch": 8.14, "learning_rate": 1.0326852634544942e-05, "loss": 0.4566, "step": 9631, "task_loss": 0.673854410648346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6253175139427185, "epoch": 8.14, "learning_rate": 1.032215647600263e-05, "loss": 0.6461, "step": 9632, "task_loss": 0.46778032183647156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.20804664492607117, "epoch": 8.14, "learning_rate": 1.0317460317460318e-05, "loss": 0.5624, "step": 9633, "task_loss": 0.16465888917446136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5180400609970093, "epoch": 8.14, "learning_rate": 1.0312764158918005e-05, "loss": 0.6002, "step": 9634, "task_loss": 0.9319434762001038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48654842376708984, "epoch": 8.14, "learning_rate": 1.0308068000375694e-05, "loss": 0.5063, "step": 9635, "task_loss": 0.8006775379180908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8423760533332825, "epoch": 8.15, "learning_rate": 1.030337184183338e-05, "loss": 0.7417, "step": 9636, "task_loss": 0.48480769991874695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8238364458084106, "epoch": 8.15, "learning_rate": 1.0298675683291069e-05, "loss": 0.6701, "step": 9637, "task_loss": 1.4269976615905762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7785571813583374, "epoch": 8.15, "learning_rate": 1.0293979524748757e-05, "loss": 0.5791, "step": 9638, "task_loss": 0.7919607162475586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3831483721733093, "epoch": 8.15, "learning_rate": 1.0289283366206445e-05, "loss": 0.5347, "step": 9639, "task_loss": 1.4496821165084839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5480550527572632, "epoch": 8.15, "learning_rate": 1.0284587207664131e-05, "loss": 0.6431, "step": 9640, "task_loss": 0.4184710681438446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4846403896808624, "epoch": 8.15, "learning_rate": 1.0279891049121819e-05, "loss": 0.6998, "step": 9641, "task_loss": 0.7643507719039917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5220224261283875, "epoch": 8.15, "learning_rate": 1.0275194890579507e-05, "loss": 0.5025, "step": 9642, "task_loss": 0.5071871876716614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3402816653251648, "epoch": 8.15, "learning_rate": 1.0270498732037193e-05, "loss": 0.527, "step": 9643, "task_loss": 1.545352816581726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6336180567741394, "epoch": 8.15, "learning_rate": 1.0265802573494881e-05, "loss": 0.4575, "step": 9644, "task_loss": 0.8746249675750732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41865405440330505, "epoch": 8.15, "learning_rate": 1.026110641495257e-05, "loss": 0.5358, "step": 9645, "task_loss": 0.5017960667610168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42262837290763855, "epoch": 8.15, "learning_rate": 1.0256410256410256e-05, "loss": 0.5266, "step": 9646, "task_loss": 0.36073970794677734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4259600043296814, "epoch": 8.15, "learning_rate": 1.0251714097867944e-05, "loss": 0.7383, "step": 9647, "task_loss": 0.53009432554245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5050508379936218, "epoch": 8.16, "learning_rate": 1.0247017939325632e-05, "loss": 0.5452, "step": 9648, "task_loss": 1.104569911956787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5976473093032837, "epoch": 8.16, "learning_rate": 1.0242321780783318e-05, "loss": 0.5728, "step": 9649, "task_loss": 0.07663723826408386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40748050808906555, "epoch": 8.16, "learning_rate": 1.0237625622241008e-05, "loss": 0.4405, "step": 9650, "task_loss": 0.17674520611763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33074161410331726, "epoch": 8.16, "learning_rate": 1.0232929463698696e-05, "loss": 0.5572, "step": 9651, "task_loss": 0.9484474062919617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8121024370193481, "epoch": 8.16, "learning_rate": 1.0228233305156382e-05, "loss": 0.5713, "step": 9652, "task_loss": 0.5390631556510925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0641276836395264, "epoch": 8.16, "learning_rate": 1.022353714661407e-05, "loss": 0.6523, "step": 9653, "task_loss": 1.008182168006897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7005025148391724, "epoch": 8.16, "learning_rate": 1.0218840988071758e-05, "loss": 0.7387, "step": 9654, "task_loss": 1.3356821537017822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.508358359336853, "epoch": 8.16, "learning_rate": 1.0214144829529446e-05, "loss": 0.5885, "step": 9655, "task_loss": 0.5363051891326904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.069261908531189, "epoch": 8.16, "learning_rate": 1.0209448670987133e-05, "loss": 0.7065, "step": 9656, "task_loss": 1.0365664958953857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3415699601173401, "epoch": 8.16, "learning_rate": 1.020475251244482e-05, "loss": 0.5507, "step": 9657, "task_loss": 0.6801299452781677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41512978076934814, "epoch": 8.16, "learning_rate": 1.0200056353902509e-05, "loss": 0.6936, "step": 9658, "task_loss": 0.11783867329359055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6024330854415894, "epoch": 8.16, "learning_rate": 1.0195360195360195e-05, "loss": 0.6147, "step": 9659, "task_loss": 0.7288731932640076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4463530480861664, "epoch": 8.17, "learning_rate": 1.0190664036817883e-05, "loss": 0.702, "step": 9660, "task_loss": 1.437488079071045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5350985527038574, "epoch": 8.17, "learning_rate": 1.0185967878275571e-05, "loss": 0.7029, "step": 9661, "task_loss": 1.7725685834884644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.558629035949707, "epoch": 8.17, "learning_rate": 1.0181271719733258e-05, "loss": 0.4641, "step": 9662, "task_loss": 0.8435415625572205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2901422381401062, "epoch": 8.17, "learning_rate": 1.0176575561190946e-05, "loss": 0.4537, "step": 9663, "task_loss": 0.1474120318889618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6094174385070801, "epoch": 8.17, "learning_rate": 1.0171879402648634e-05, "loss": 0.6409, "step": 9664, "task_loss": 1.3493342399597168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3722876310348511, "epoch": 8.17, "learning_rate": 1.016718324410632e-05, "loss": 0.4886, "step": 9665, "task_loss": 1.034665822982788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46715110540390015, "epoch": 8.17, "learning_rate": 1.016248708556401e-05, "loss": 0.4967, "step": 9666, "task_loss": 0.9524937272071838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45059606432914734, "epoch": 8.17, "learning_rate": 1.0157790927021698e-05, "loss": 0.4698, "step": 9667, "task_loss": 0.8332475423812866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41986221075057983, "epoch": 8.17, "learning_rate": 1.0153094768479384e-05, "loss": 0.5535, "step": 9668, "task_loss": 0.445401668548584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5132462978363037, "epoch": 8.17, "learning_rate": 1.0148398609937072e-05, "loss": 0.6885, "step": 9669, "task_loss": 0.3993627429008484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32254135608673096, "epoch": 8.17, "learning_rate": 1.014370245139476e-05, "loss": 0.4815, "step": 9670, "task_loss": 0.7303148508071899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5991589426994324, "epoch": 8.17, "learning_rate": 1.0139006292852447e-05, "loss": 0.7541, "step": 9671, "task_loss": 0.7975488305091858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9351520538330078, "epoch": 8.18, "learning_rate": 1.0134310134310135e-05, "loss": 0.6401, "step": 9672, "task_loss": 1.0616929531097412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3274337649345398, "epoch": 8.18, "learning_rate": 1.0129613975767823e-05, "loss": 0.445, "step": 9673, "task_loss": 0.5255476236343384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8525959849357605, "epoch": 8.18, "learning_rate": 1.012491781722551e-05, "loss": 0.582, "step": 9674, "task_loss": 0.4455711543560028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2650267779827118, "epoch": 8.18, "learning_rate": 1.0120221658683197e-05, "loss": 0.378, "step": 9675, "task_loss": 1.0275119543075562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7611756920814514, "epoch": 8.18, "learning_rate": 1.0115525500140885e-05, "loss": 0.7885, "step": 9676, "task_loss": 1.0114465951919556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5324666500091553, "epoch": 8.18, "learning_rate": 1.0110829341598573e-05, "loss": 0.5342, "step": 9677, "task_loss": 0.40813690423965454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5322350263595581, "epoch": 8.18, "learning_rate": 1.010613318305626e-05, "loss": 0.5966, "step": 9678, "task_loss": 1.0487946271896362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.281570702791214, "epoch": 8.18, "learning_rate": 1.0101437024513947e-05, "loss": 0.4547, "step": 9679, "task_loss": 0.2526683807373047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7620961666107178, "epoch": 8.18, "learning_rate": 1.0096740865971635e-05, "loss": 0.562, "step": 9680, "task_loss": 0.2998420298099518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7353916168212891, "epoch": 8.18, "learning_rate": 1.0092044707429323e-05, "loss": 0.6114, "step": 9681, "task_loss": 0.5620479583740234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36116263270378113, "epoch": 8.18, "learning_rate": 1.0087348548887012e-05, "loss": 0.6199, "step": 9682, "task_loss": 0.284557968378067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4559437036514282, "epoch": 8.19, "learning_rate": 1.00826523903447e-05, "loss": 0.5057, "step": 9683, "task_loss": 0.36344704031944275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5914353132247925, "epoch": 8.19, "learning_rate": 1.0077956231802386e-05, "loss": 0.6807, "step": 9684, "task_loss": 0.5510467290878296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6076349020004272, "epoch": 8.19, "learning_rate": 1.0073260073260074e-05, "loss": 0.4279, "step": 9685, "task_loss": 1.1359875202178955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44041526317596436, "epoch": 8.19, "learning_rate": 1.0068563914717762e-05, "loss": 0.4453, "step": 9686, "task_loss": 0.8719883561134338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.766585111618042, "epoch": 8.19, "learning_rate": 1.0063867756175448e-05, "loss": 0.5963, "step": 9687, "task_loss": 1.323373794555664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3077762722969055, "epoch": 8.19, "learning_rate": 1.0059171597633136e-05, "loss": 0.581, "step": 9688, "task_loss": 0.820094645023346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4953611493110657, "epoch": 8.19, "learning_rate": 1.0054475439090824e-05, "loss": 0.6732, "step": 9689, "task_loss": 0.28626519441604614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3266690671443939, "epoch": 8.19, "learning_rate": 1.0049779280548512e-05, "loss": 0.5416, "step": 9690, "task_loss": 0.3961522877216339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.748691976070404, "epoch": 8.19, "learning_rate": 1.0045083122006199e-05, "loss": 0.6366, "step": 9691, "task_loss": 0.9961747527122498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.767720639705658, "epoch": 8.19, "learning_rate": 1.0040386963463887e-05, "loss": 0.8054, "step": 9692, "task_loss": 0.8883514404296875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3585298955440521, "epoch": 8.19, "learning_rate": 1.0035690804921575e-05, "loss": 0.6217, "step": 9693, "task_loss": 0.30924996733665466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7342717051506042, "epoch": 8.19, "learning_rate": 1.0030994646379261e-05, "loss": 0.5851, "step": 9694, "task_loss": 0.38798096776008606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7038295269012451, "epoch": 8.2, "learning_rate": 1.002629848783695e-05, "loss": 0.5016, "step": 9695, "task_loss": 1.965148687362671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8120602369308472, "epoch": 8.2, "learning_rate": 1.0021602329294639e-05, "loss": 0.6741, "step": 9696, "task_loss": 1.078934907913208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2977752983570099, "epoch": 8.2, "learning_rate": 1.0016906170752325e-05, "loss": 0.6265, "step": 9697, "task_loss": 0.20462608337402344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4005044400691986, "epoch": 8.2, "learning_rate": 1.0012210012210013e-05, "loss": 0.5786, "step": 9698, "task_loss": 0.6635235548019409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3586342930793762, "epoch": 8.2, "learning_rate": 1.0007513853667701e-05, "loss": 0.4634, "step": 9699, "task_loss": 0.699041485786438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6263784170150757, "epoch": 8.2, "learning_rate": 1.0002817695125388e-05, "loss": 0.5057, "step": 9700, "task_loss": 0.5286819934844971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5996286869049072, "epoch": 8.2, "learning_rate": 9.998121536583076e-06, "loss": 0.6412, "step": 9701, "task_loss": 0.89089035987854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5347873568534851, "epoch": 8.2, "learning_rate": 9.993425378040764e-06, "loss": 0.5109, "step": 9702, "task_loss": 0.7501329779624939 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6090496182441711, "epoch": 8.2, "learning_rate": 9.98872921949845e-06, "loss": 0.5883, "step": 9703, "task_loss": 0.9367045760154724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7074451446533203, "epoch": 8.2, "learning_rate": 9.984033060956138e-06, "loss": 0.6328, "step": 9704, "task_loss": 1.0266928672790527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5220617651939392, "epoch": 8.2, "learning_rate": 9.979336902413826e-06, "loss": 0.6705, "step": 9705, "task_loss": 0.928917646408081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29194092750549316, "epoch": 8.2, "learning_rate": 9.974640743871514e-06, "loss": 0.749, "step": 9706, "task_loss": 1.1935652494430542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3610946834087372, "epoch": 8.21, "learning_rate": 9.9699445853292e-06, "loss": 0.4067, "step": 9707, "task_loss": 0.4835236370563507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31630733609199524, "epoch": 8.21, "learning_rate": 9.965248426786889e-06, "loss": 0.637, "step": 9708, "task_loss": 0.19658233225345612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7364922761917114, "epoch": 8.21, "learning_rate": 9.960552268244577e-06, "loss": 0.6355, "step": 9709, "task_loss": 0.6864526867866516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2600601613521576, "epoch": 8.21, "learning_rate": 9.955856109702263e-06, "loss": 0.6146, "step": 9710, "task_loss": 0.2657170593738556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4554949998855591, "epoch": 8.21, "learning_rate": 9.951159951159951e-06, "loss": 0.5356, "step": 9711, "task_loss": 0.47120094299316406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34213680028915405, "epoch": 8.21, "learning_rate": 9.94646379261764e-06, "loss": 0.4407, "step": 9712, "task_loss": 0.3299012780189514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6756607294082642, "epoch": 8.21, "learning_rate": 9.941767634075327e-06, "loss": 0.6295, "step": 9713, "task_loss": 0.5283715128898621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47305184602737427, "epoch": 8.21, "learning_rate": 9.937071475533015e-06, "loss": 0.4962, "step": 9714, "task_loss": 0.6169249415397644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5045104622840881, "epoch": 8.21, "learning_rate": 9.932375316990703e-06, "loss": 0.5845, "step": 9715, "task_loss": 0.5523807406425476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26364409923553467, "epoch": 8.21, "learning_rate": 9.92767915844839e-06, "loss": 0.4559, "step": 9716, "task_loss": 0.2695615589618683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5800600051879883, "epoch": 8.21, "learning_rate": 9.922982999906077e-06, "loss": 0.6688, "step": 9717, "task_loss": 0.8304736614227295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5980679988861084, "epoch": 8.21, "learning_rate": 9.918286841363765e-06, "loss": 0.6277, "step": 9718, "task_loss": 0.9424735307693481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.138640284538269, "epoch": 8.22, "learning_rate": 9.913590682821452e-06, "loss": 0.6425, "step": 9719, "task_loss": 1.504014492034912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7370280623435974, "epoch": 8.22, "learning_rate": 9.90889452427914e-06, "loss": 0.5342, "step": 9720, "task_loss": 0.48263704776763916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4169591963291168, "epoch": 8.22, "learning_rate": 9.904198365736828e-06, "loss": 0.5583, "step": 9721, "task_loss": 1.383323073387146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5020022392272949, "epoch": 8.22, "learning_rate": 9.899502207194516e-06, "loss": 0.5155, "step": 9722, "task_loss": 0.8042597770690918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9070491194725037, "epoch": 8.22, "learning_rate": 9.894806048652202e-06, "loss": 0.6629, "step": 9723, "task_loss": 1.3156700134277344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44783931970596313, "epoch": 8.22, "learning_rate": 9.89010989010989e-06, "loss": 0.4947, "step": 9724, "task_loss": 0.3302594721317291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6887768507003784, "epoch": 8.22, "learning_rate": 9.885413731567578e-06, "loss": 0.505, "step": 9725, "task_loss": 0.24923601746559143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5249003767967224, "epoch": 8.22, "learning_rate": 9.880717573025265e-06, "loss": 0.4638, "step": 9726, "task_loss": 0.17837874591350555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6553055047988892, "epoch": 8.22, "learning_rate": 9.876021414482954e-06, "loss": 0.5512, "step": 9727, "task_loss": 0.9879478812217712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8225804567337036, "epoch": 8.22, "learning_rate": 9.871325255940642e-06, "loss": 0.657, "step": 9728, "task_loss": 1.526862621307373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5889589190483093, "epoch": 8.22, "learning_rate": 9.866629097398329e-06, "loss": 0.6618, "step": 9729, "task_loss": 0.624082088470459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.23591750860214233, "epoch": 8.22, "learning_rate": 9.861932938856017e-06, "loss": 0.3786, "step": 9730, "task_loss": 0.5457968711853027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4147767722606659, "epoch": 8.23, "learning_rate": 9.857236780313705e-06, "loss": 0.5154, "step": 9731, "task_loss": 0.5059517025947571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4142061173915863, "epoch": 8.23, "learning_rate": 9.852540621771391e-06, "loss": 0.4977, "step": 9732, "task_loss": 1.0448243618011475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4095041751861572, "epoch": 8.23, "learning_rate": 9.84784446322908e-06, "loss": 0.5893, "step": 9733, "task_loss": 0.5520946383476257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5500776171684265, "epoch": 8.23, "learning_rate": 9.843148304686767e-06, "loss": 0.5323, "step": 9734, "task_loss": 0.6083729863166809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44986051321029663, "epoch": 8.23, "learning_rate": 9.838452146144454e-06, "loss": 0.5417, "step": 9735, "task_loss": 0.35752367973327637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6286810040473938, "epoch": 8.23, "learning_rate": 9.833755987602142e-06, "loss": 0.7736, "step": 9736, "task_loss": 1.1054311990737915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6806812882423401, "epoch": 8.23, "learning_rate": 9.82905982905983e-06, "loss": 0.7261, "step": 9737, "task_loss": 0.8727285861968994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4556974172592163, "epoch": 8.23, "learning_rate": 9.824363670517516e-06, "loss": 0.4943, "step": 9738, "task_loss": 1.1568883657455444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41696596145629883, "epoch": 8.23, "learning_rate": 9.819667511975204e-06, "loss": 0.4146, "step": 9739, "task_loss": 1.722784161567688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9170864820480347, "epoch": 8.23, "learning_rate": 9.814971353432892e-06, "loss": 0.6393, "step": 9740, "task_loss": 1.517313003540039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5607746839523315, "epoch": 8.23, "learning_rate": 9.81027519489058e-06, "loss": 0.6367, "step": 9741, "task_loss": 0.633549153804779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5163124203681946, "epoch": 8.23, "learning_rate": 9.805579036348266e-06, "loss": 0.4495, "step": 9742, "task_loss": 0.9615167379379272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6394141912460327, "epoch": 8.24, "learning_rate": 9.800882877805956e-06, "loss": 0.4963, "step": 9743, "task_loss": 1.116018295288086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3375459313392639, "epoch": 8.24, "learning_rate": 9.796186719263643e-06, "loss": 0.4894, "step": 9744, "task_loss": 0.28979170322418213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5211962461471558, "epoch": 8.24, "learning_rate": 9.79149056072133e-06, "loss": 0.4494, "step": 9745, "task_loss": 1.1818859577178955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5228438377380371, "epoch": 8.24, "learning_rate": 9.786794402179019e-06, "loss": 0.5962, "step": 9746, "task_loss": 0.5587165355682373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9166895747184753, "epoch": 8.24, "learning_rate": 9.782098243636707e-06, "loss": 0.6407, "step": 9747, "task_loss": 0.9323499798774719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6798045635223389, "epoch": 8.24, "learning_rate": 9.777402085094393e-06, "loss": 0.5301, "step": 9748, "task_loss": 1.1629339456558228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2392926812171936, "epoch": 8.24, "learning_rate": 9.772705926552081e-06, "loss": 0.4703, "step": 9749, "task_loss": 0.6324536800384521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43994563817977905, "epoch": 8.24, "learning_rate": 9.768009768009769e-06, "loss": 0.456, "step": 9750, "task_loss": 0.49706903100013733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7980172634124756, "epoch": 8.24, "learning_rate": 9.763313609467455e-06, "loss": 0.6646, "step": 9751, "task_loss": 1.5138858556747437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3976016938686371, "epoch": 8.24, "learning_rate": 9.758617450925143e-06, "loss": 0.4915, "step": 9752, "task_loss": 0.5660594701766968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7122954726219177, "epoch": 8.24, "learning_rate": 9.753921292382831e-06, "loss": 0.5647, "step": 9753, "task_loss": 0.4660203754901886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3564930558204651, "epoch": 8.24, "learning_rate": 9.749225133840518e-06, "loss": 0.5396, "step": 9754, "task_loss": 0.45521149039268494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43189650774002075, "epoch": 8.25, "learning_rate": 9.744528975298206e-06, "loss": 0.4873, "step": 9755, "task_loss": 1.3142577409744263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6819413900375366, "epoch": 8.25, "learning_rate": 9.739832816755894e-06, "loss": 0.545, "step": 9756, "task_loss": 0.5768380761146545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.441531777381897, "epoch": 8.25, "learning_rate": 9.735136658213582e-06, "loss": 0.5128, "step": 9757, "task_loss": 1.1548147201538086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4253453016281128, "epoch": 8.25, "learning_rate": 9.73044049967127e-06, "loss": 0.4571, "step": 9758, "task_loss": 0.9202203154563904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4309545159339905, "epoch": 8.25, "learning_rate": 9.725744341128958e-06, "loss": 0.4057, "step": 9759, "task_loss": 0.2730085849761963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.22604672610759735, "epoch": 8.25, "learning_rate": 9.721048182586644e-06, "loss": 0.3544, "step": 9760, "task_loss": 0.25521770119667053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6673523187637329, "epoch": 8.25, "learning_rate": 9.716352024044332e-06, "loss": 0.5784, "step": 9761, "task_loss": 1.2078791856765747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6351190805435181, "epoch": 8.25, "learning_rate": 9.71165586550202e-06, "loss": 0.6503, "step": 9762, "task_loss": 0.5690380334854126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43066397309303284, "epoch": 8.25, "learning_rate": 9.706959706959708e-06, "loss": 0.5194, "step": 9763, "task_loss": 0.6990838646888733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39844387769699097, "epoch": 8.25, "learning_rate": 9.702263548417395e-06, "loss": 0.7076, "step": 9764, "task_loss": 0.44923555850982666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42206496000289917, "epoch": 8.25, "learning_rate": 9.697567389875083e-06, "loss": 0.4525, "step": 9765, "task_loss": 0.6137364506721497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4665870666503906, "epoch": 8.26, "learning_rate": 9.69287123133277e-06, "loss": 0.653, "step": 9766, "task_loss": 0.6588395237922668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.30985769629478455, "epoch": 8.26, "learning_rate": 9.688175072790457e-06, "loss": 0.61, "step": 9767, "task_loss": 0.05884511396288872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6152005195617676, "epoch": 8.26, "learning_rate": 9.683478914248145e-06, "loss": 0.6039, "step": 9768, "task_loss": 0.5804699659347534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5957076549530029, "epoch": 8.26, "learning_rate": 9.678782755705833e-06, "loss": 0.6496, "step": 9769, "task_loss": 0.48034799098968506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5379688739776611, "epoch": 8.26, "learning_rate": 9.67408659716352e-06, "loss": 0.6202, "step": 9770, "task_loss": 0.6915296912193298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6712318062782288, "epoch": 8.26, "learning_rate": 9.669390438621208e-06, "loss": 0.6672, "step": 9771, "task_loss": 0.8606024980545044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6139724254608154, "epoch": 8.26, "learning_rate": 9.664694280078896e-06, "loss": 0.657, "step": 9772, "task_loss": 0.6612762212753296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5048691630363464, "epoch": 8.26, "learning_rate": 9.659998121536584e-06, "loss": 0.5206, "step": 9773, "task_loss": 0.21492275595664978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5683215856552124, "epoch": 8.26, "learning_rate": 9.655301962994272e-06, "loss": 0.5532, "step": 9774, "task_loss": 0.5469168424606323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3071179986000061, "epoch": 8.26, "learning_rate": 9.65060580445196e-06, "loss": 0.526, "step": 9775, "task_loss": 0.567359209060669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7208547592163086, "epoch": 8.26, "learning_rate": 9.645909645909646e-06, "loss": 0.5371, "step": 9776, "task_loss": 0.5636171698570251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37827035784721375, "epoch": 8.26, "learning_rate": 9.641213487367334e-06, "loss": 0.5241, "step": 9777, "task_loss": 0.507062554359436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40310561656951904, "epoch": 8.27, "learning_rate": 9.636517328825022e-06, "loss": 0.5029, "step": 9778, "task_loss": 0.6104427576065063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5208982825279236, "epoch": 8.27, "learning_rate": 9.63182117028271e-06, "loss": 0.5529, "step": 9779, "task_loss": 0.7730090618133545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.20143499970436096, "epoch": 8.27, "learning_rate": 9.627125011740396e-06, "loss": 0.4856, "step": 9780, "task_loss": 0.09783617407083511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.542900025844574, "epoch": 8.27, "learning_rate": 9.622428853198085e-06, "loss": 0.5476, "step": 9781, "task_loss": 0.40474212169647217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3988785147666931, "epoch": 8.27, "learning_rate": 9.617732694655773e-06, "loss": 0.5852, "step": 9782, "task_loss": 0.5491964221000671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4506411552429199, "epoch": 8.27, "learning_rate": 9.613036536113459e-06, "loss": 0.5498, "step": 9783, "task_loss": 1.1506603956222534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8075918555259705, "epoch": 8.27, "learning_rate": 9.608340377571147e-06, "loss": 0.5485, "step": 9784, "task_loss": 1.2915458679199219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41317683458328247, "epoch": 8.27, "learning_rate": 9.603644219028835e-06, "loss": 0.5755, "step": 9785, "task_loss": 1.0727864503860474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5664294362068176, "epoch": 8.27, "learning_rate": 9.598948060486521e-06, "loss": 0.6346, "step": 9786, "task_loss": 0.694263219833374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6403162479400635, "epoch": 8.27, "learning_rate": 9.59425190194421e-06, "loss": 0.57, "step": 9787, "task_loss": 0.7939872741699219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6411526203155518, "epoch": 8.27, "learning_rate": 9.589555743401897e-06, "loss": 0.5314, "step": 9788, "task_loss": 0.6053494215011597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5246749520301819, "epoch": 8.27, "learning_rate": 9.584859584859585e-06, "loss": 0.625, "step": 9789, "task_loss": 0.8966299891471863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4940210282802582, "epoch": 8.28, "learning_rate": 9.580163426317273e-06, "loss": 0.5315, "step": 9790, "task_loss": 0.8179449439048767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43058985471725464, "epoch": 8.28, "learning_rate": 9.575467267774961e-06, "loss": 0.6147, "step": 9791, "task_loss": 1.3632245063781738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7816107273101807, "epoch": 8.28, "learning_rate": 9.570771109232648e-06, "loss": 0.5868, "step": 9792, "task_loss": 2.1742801666259766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7177138328552246, "epoch": 8.28, "learning_rate": 9.566074950690336e-06, "loss": 0.5191, "step": 9793, "task_loss": 0.9191970229148865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4530789256095886, "epoch": 8.28, "learning_rate": 9.561378792148024e-06, "loss": 0.5311, "step": 9794, "task_loss": 0.6190091967582703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5401563048362732, "epoch": 8.28, "learning_rate": 9.556682633605712e-06, "loss": 0.4929, "step": 9795, "task_loss": 1.370422124862671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7444018125534058, "epoch": 8.28, "learning_rate": 9.551986475063398e-06, "loss": 0.7258, "step": 9796, "task_loss": 0.8194546699523926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6824308633804321, "epoch": 8.28, "learning_rate": 9.547290316521086e-06, "loss": 0.618, "step": 9797, "task_loss": 0.608131468296051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42822542786598206, "epoch": 8.28, "learning_rate": 9.542594157978774e-06, "loss": 0.3862, "step": 9798, "task_loss": 0.035425007343292236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6065788865089417, "epoch": 8.28, "learning_rate": 9.53789799943646e-06, "loss": 0.5817, "step": 9799, "task_loss": 0.6924965381622314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1237983703613281, "epoch": 8.28, "learning_rate": 9.533201840894149e-06, "loss": 0.5957, "step": 9800, "task_loss": 1.1971031427383423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.433228075504303, "epoch": 8.28, "learning_rate": 9.528505682351837e-06, "loss": 0.5634, "step": 9801, "task_loss": 0.8149638175964355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4285328984260559, "epoch": 8.29, "learning_rate": 9.523809523809523e-06, "loss": 0.5891, "step": 9802, "task_loss": 0.6977156400680542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36459070444107056, "epoch": 8.29, "learning_rate": 9.519113365267211e-06, "loss": 0.597, "step": 9803, "task_loss": 0.12011934816837311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4644562005996704, "epoch": 8.29, "learning_rate": 9.5144172067249e-06, "loss": 0.4944, "step": 9804, "task_loss": 0.7115161418914795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48303931951522827, "epoch": 8.29, "learning_rate": 9.509721048182587e-06, "loss": 0.5961, "step": 9805, "task_loss": 0.46976321935653687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.501556932926178, "epoch": 8.29, "learning_rate": 9.505024889640275e-06, "loss": 0.4283, "step": 9806, "task_loss": 1.046364426612854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3391198217868805, "epoch": 8.29, "learning_rate": 9.500328731097963e-06, "loss": 0.606, "step": 9807, "task_loss": 0.6678954362869263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49160265922546387, "epoch": 8.29, "learning_rate": 9.49563257255565e-06, "loss": 0.4675, "step": 9808, "task_loss": 0.5005508661270142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6155393123626709, "epoch": 8.29, "learning_rate": 9.490936414013338e-06, "loss": 0.7818, "step": 9809, "task_loss": 1.612362027168274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.907633900642395, "epoch": 8.29, "learning_rate": 9.486240255471026e-06, "loss": 0.5496, "step": 9810, "task_loss": 0.9222021698951721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3393726944923401, "epoch": 8.29, "learning_rate": 9.481544096928712e-06, "loss": 0.4271, "step": 9811, "task_loss": 0.40613651275634766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8157544136047363, "epoch": 8.29, "learning_rate": 9.4768479383864e-06, "loss": 0.6059, "step": 9812, "task_loss": 0.8594703078269958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33560436964035034, "epoch": 8.29, "learning_rate": 9.472151779844088e-06, "loss": 0.4758, "step": 9813, "task_loss": 0.21717692911624908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5319986343383789, "epoch": 8.3, "learning_rate": 9.467455621301776e-06, "loss": 0.5968, "step": 9814, "task_loss": 0.8050386309623718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4662114977836609, "epoch": 8.3, "learning_rate": 9.462759462759462e-06, "loss": 0.5715, "step": 9815, "task_loss": 0.27863043546676636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6997028589248657, "epoch": 8.3, "learning_rate": 9.45806330421715e-06, "loss": 0.5676, "step": 9816, "task_loss": 1.0485244989395142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.737250804901123, "epoch": 8.3, "learning_rate": 9.453367145674838e-06, "loss": 0.672, "step": 9817, "task_loss": 0.6227298378944397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.21577823162078857, "epoch": 8.3, "learning_rate": 9.448670987132525e-06, "loss": 0.5153, "step": 9818, "task_loss": 1.2230491638183594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4244789481163025, "epoch": 8.3, "learning_rate": 9.443974828590213e-06, "loss": 0.589, "step": 9819, "task_loss": 0.3889341950416565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.22982212901115417, "epoch": 8.3, "learning_rate": 9.439278670047903e-06, "loss": 0.5813, "step": 9820, "task_loss": 0.738448977470398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8293799757957458, "epoch": 8.3, "learning_rate": 9.434582511505589e-06, "loss": 0.6354, "step": 9821, "task_loss": 1.038421869277954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6998562216758728, "epoch": 8.3, "learning_rate": 9.429886352963277e-06, "loss": 0.6926, "step": 9822, "task_loss": 1.4274076223373413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49759945273399353, "epoch": 8.3, "learning_rate": 9.425190194420965e-06, "loss": 0.4667, "step": 9823, "task_loss": 0.6074180006980896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5578672885894775, "epoch": 8.3, "learning_rate": 9.420494035878651e-06, "loss": 0.4985, "step": 9824, "task_loss": 0.5766232013702393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43104806542396545, "epoch": 8.3, "learning_rate": 9.41579787733634e-06, "loss": 0.5109, "step": 9825, "task_loss": 0.6721722483634949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.700416624546051, "epoch": 8.31, "learning_rate": 9.411101718794027e-06, "loss": 0.5728, "step": 9826, "task_loss": 0.5904430150985718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5497042536735535, "epoch": 8.31, "learning_rate": 9.406405560251714e-06, "loss": 0.4527, "step": 9827, "task_loss": 0.1918277144432068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6594712734222412, "epoch": 8.31, "learning_rate": 9.401709401709402e-06, "loss": 0.7735, "step": 9828, "task_loss": 1.0352098941802979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41223740577697754, "epoch": 8.31, "learning_rate": 9.39701324316709e-06, "loss": 0.4859, "step": 9829, "task_loss": 0.6093012094497681 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5157281160354614, "epoch": 8.31, "learning_rate": 9.392317084624778e-06, "loss": 0.4954, "step": 9830, "task_loss": 0.7781351208686829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3514488637447357, "epoch": 8.31, "learning_rate": 9.387620926082464e-06, "loss": 0.5409, "step": 9831, "task_loss": 0.5908054709434509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42079484462738037, "epoch": 8.31, "learning_rate": 9.382924767540152e-06, "loss": 0.3872, "step": 9832, "task_loss": 0.7526048421859741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7556208372116089, "epoch": 8.31, "learning_rate": 9.37822860899784e-06, "loss": 0.4906, "step": 9833, "task_loss": 0.6052581667900085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5553171038627625, "epoch": 8.31, "learning_rate": 9.373532450455527e-06, "loss": 0.6166, "step": 9834, "task_loss": 0.4010315537452698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42976170778274536, "epoch": 8.31, "learning_rate": 9.368836291913216e-06, "loss": 0.4403, "step": 9835, "task_loss": 0.13359814882278442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5904964208602905, "epoch": 8.31, "learning_rate": 9.364140133370904e-06, "loss": 0.6355, "step": 9836, "task_loss": 0.6333041787147522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7922317981719971, "epoch": 8.32, "learning_rate": 9.35944397482859e-06, "loss": 0.6821, "step": 9837, "task_loss": 0.9209162592887878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48530662059783936, "epoch": 8.32, "learning_rate": 9.354747816286279e-06, "loss": 0.5328, "step": 9838, "task_loss": 0.7902927398681641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5380594730377197, "epoch": 8.32, "learning_rate": 9.350051657743967e-06, "loss": 0.5983, "step": 9839, "task_loss": 0.55659419298172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5057859420776367, "epoch": 8.32, "learning_rate": 9.345355499201653e-06, "loss": 0.5519, "step": 9840, "task_loss": 0.792490541934967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4331507980823517, "epoch": 8.32, "learning_rate": 9.340659340659341e-06, "loss": 0.3779, "step": 9841, "task_loss": 0.6585694551467896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2747732996940613, "epoch": 8.32, "learning_rate": 9.33596318211703e-06, "loss": 0.4348, "step": 9842, "task_loss": 0.41783568263053894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6122369170188904, "epoch": 8.32, "learning_rate": 9.331267023574715e-06, "loss": 0.7642, "step": 9843, "task_loss": 0.8616459965705872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48968708515167236, "epoch": 8.32, "learning_rate": 9.326570865032404e-06, "loss": 0.5997, "step": 9844, "task_loss": 0.3671579957008362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.342782199382782, "epoch": 8.32, "learning_rate": 9.321874706490092e-06, "loss": 0.6366, "step": 9845, "task_loss": 0.44838225841522217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5338980555534363, "epoch": 8.32, "learning_rate": 9.31717854794778e-06, "loss": 0.5121, "step": 9846, "task_loss": 1.0960882902145386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5870649814605713, "epoch": 8.32, "learning_rate": 9.312482389405466e-06, "loss": 0.5842, "step": 9847, "task_loss": 0.2834669053554535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48407459259033203, "epoch": 8.32, "learning_rate": 9.307786230863154e-06, "loss": 0.4343, "step": 9848, "task_loss": 0.5650699138641357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1554495096206665, "epoch": 8.33, "learning_rate": 9.303090072320842e-06, "loss": 0.6189, "step": 9849, "task_loss": 1.185685396194458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7156877517700195, "epoch": 8.33, "learning_rate": 9.298393913778528e-06, "loss": 0.5343, "step": 9850, "task_loss": 0.9846588969230652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5293791890144348, "epoch": 8.33, "learning_rate": 9.293697755236218e-06, "loss": 0.5355, "step": 9851, "task_loss": 0.5533600449562073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5256255269050598, "epoch": 8.33, "learning_rate": 9.289001596693906e-06, "loss": 0.521, "step": 9852, "task_loss": 0.41682523488998413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.898876428604126, "epoch": 8.33, "learning_rate": 9.284305438151592e-06, "loss": 0.7611, "step": 9853, "task_loss": 0.8341476917266846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9197369813919067, "epoch": 8.33, "learning_rate": 9.27960927960928e-06, "loss": 0.5908, "step": 9854, "task_loss": 1.0809704065322876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6401734948158264, "epoch": 8.33, "learning_rate": 9.274913121066969e-06, "loss": 0.5565, "step": 9855, "task_loss": 0.6759198904037476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35111457109451294, "epoch": 8.33, "learning_rate": 9.270216962524655e-06, "loss": 0.4862, "step": 9856, "task_loss": 0.1634945273399353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1975913047790527, "epoch": 8.33, "learning_rate": 9.265520803982343e-06, "loss": 0.6752, "step": 9857, "task_loss": 0.7135130167007446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34046322107315063, "epoch": 8.33, "learning_rate": 9.260824645440031e-06, "loss": 0.4531, "step": 9858, "task_loss": 0.5800590515136719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7673255801200867, "epoch": 8.33, "learning_rate": 9.256128486897717e-06, "loss": 0.6002, "step": 9859, "task_loss": 0.5264628529548645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44512617588043213, "epoch": 8.33, "learning_rate": 9.251432328355405e-06, "loss": 0.6741, "step": 9860, "task_loss": 1.0864309072494507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3958045244216919, "epoch": 8.34, "learning_rate": 9.246736169813093e-06, "loss": 0.5455, "step": 9861, "task_loss": 0.26514023542404175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4044870138168335, "epoch": 8.34, "learning_rate": 9.242040011270781e-06, "loss": 0.6991, "step": 9862, "task_loss": 0.7736174464225769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45432838797569275, "epoch": 8.34, "learning_rate": 9.237343852728468e-06, "loss": 0.4905, "step": 9863, "task_loss": 0.6095198392868042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5033773183822632, "epoch": 8.34, "learning_rate": 9.232647694186156e-06, "loss": 0.533, "step": 9864, "task_loss": 0.3036006689071655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29851672053337097, "epoch": 8.34, "learning_rate": 9.227951535643844e-06, "loss": 0.3976, "step": 9865, "task_loss": 0.2025485783815384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8515947461128235, "epoch": 8.34, "learning_rate": 9.223255377101532e-06, "loss": 0.7501, "step": 9866, "task_loss": 0.9389605522155762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44136229157447815, "epoch": 8.34, "learning_rate": 9.21855921855922e-06, "loss": 0.5672, "step": 9867, "task_loss": 0.2064133584499359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47419092059135437, "epoch": 8.34, "learning_rate": 9.213863060016908e-06, "loss": 0.4892, "step": 9868, "task_loss": 0.4505825936794281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3481079339981079, "epoch": 8.34, "learning_rate": 9.209166901474594e-06, "loss": 0.404, "step": 9869, "task_loss": 0.10067126899957657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4792461395263672, "epoch": 8.34, "learning_rate": 9.204470742932282e-06, "loss": 0.5252, "step": 9870, "task_loss": 0.5779575705528259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4125569462776184, "epoch": 8.34, "learning_rate": 9.19977458438997e-06, "loss": 0.3425, "step": 9871, "task_loss": 0.7925093173980713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7406113147735596, "epoch": 8.34, "learning_rate": 9.195078425847657e-06, "loss": 0.6271, "step": 9872, "task_loss": 0.8593380451202393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5144037008285522, "epoch": 8.35, "learning_rate": 9.190382267305345e-06, "loss": 0.5578, "step": 9873, "task_loss": 0.868512749671936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4268445670604706, "epoch": 8.35, "learning_rate": 9.185686108763033e-06, "loss": 0.711, "step": 9874, "task_loss": 0.5306088924407959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.605656087398529, "epoch": 8.35, "learning_rate": 9.180989950220719e-06, "loss": 0.5597, "step": 9875, "task_loss": 1.57271146774292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4124353528022766, "epoch": 8.35, "learning_rate": 9.176293791678407e-06, "loss": 0.393, "step": 9876, "task_loss": 0.07403106242418289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2852230370044708, "epoch": 8.35, "learning_rate": 9.171597633136095e-06, "loss": 0.519, "step": 9877, "task_loss": 0.45750540494918823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48764568567276, "epoch": 8.35, "learning_rate": 9.166901474593783e-06, "loss": 0.526, "step": 9878, "task_loss": 0.6741166710853577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37947091460227966, "epoch": 8.35, "learning_rate": 9.16220531605147e-06, "loss": 0.356, "step": 9879, "task_loss": 0.17387795448303223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2866538166999817, "epoch": 8.35, "learning_rate": 9.157509157509158e-06, "loss": 0.5924, "step": 9880, "task_loss": 0.285640686750412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5311758518218994, "epoch": 8.35, "learning_rate": 9.152812998966846e-06, "loss": 0.5765, "step": 9881, "task_loss": 0.3697514832019806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9276049733161926, "epoch": 8.35, "learning_rate": 9.148116840424534e-06, "loss": 0.6718, "step": 9882, "task_loss": 0.826274037361145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7386634349822998, "epoch": 8.35, "learning_rate": 9.143420681882222e-06, "loss": 0.5698, "step": 9883, "task_loss": 0.8295484781265259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8462631702423096, "epoch": 8.35, "learning_rate": 9.13872452333991e-06, "loss": 0.7331, "step": 9884, "task_loss": 1.050635576248169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46749889850616455, "epoch": 8.36, "learning_rate": 9.134028364797596e-06, "loss": 0.5523, "step": 9885, "task_loss": 0.613349437713623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4805455505847931, "epoch": 8.36, "learning_rate": 9.129332206255284e-06, "loss": 0.5041, "step": 9886, "task_loss": 0.4506632089614868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8208001852035522, "epoch": 8.36, "learning_rate": 9.124636047712972e-06, "loss": 0.5504, "step": 9887, "task_loss": 1.1708484888076782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6454461216926575, "epoch": 8.36, "learning_rate": 9.119939889170658e-06, "loss": 0.6527, "step": 9888, "task_loss": 1.125724196434021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8337910175323486, "epoch": 8.36, "learning_rate": 9.115243730628346e-06, "loss": 0.7518, "step": 9889, "task_loss": 0.31285059452056885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3674655854701996, "epoch": 8.36, "learning_rate": 9.110547572086034e-06, "loss": 0.5141, "step": 9890, "task_loss": 0.2200472205877304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4796905517578125, "epoch": 8.36, "learning_rate": 9.10585141354372e-06, "loss": 0.414, "step": 9891, "task_loss": 0.30466580390930176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7767717242240906, "epoch": 8.36, "learning_rate": 9.101155255001409e-06, "loss": 0.5838, "step": 9892, "task_loss": 0.5519978404045105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.557163655757904, "epoch": 8.36, "learning_rate": 9.096459096459097e-06, "loss": 0.5598, "step": 9893, "task_loss": 0.6719943881034851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3806896209716797, "epoch": 8.36, "learning_rate": 9.091762937916783e-06, "loss": 0.4798, "step": 9894, "task_loss": 0.4340420365333557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46470317244529724, "epoch": 8.36, "learning_rate": 9.087066779374471e-06, "loss": 0.4472, "step": 9895, "task_loss": 0.3854176998138428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2771468162536621, "epoch": 8.36, "learning_rate": 9.08237062083216e-06, "loss": 0.4936, "step": 9896, "task_loss": 0.622061550617218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2659100890159607, "epoch": 8.37, "learning_rate": 9.077674462289847e-06, "loss": 0.4477, "step": 9897, "task_loss": 0.8043898940086365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43474987149238586, "epoch": 8.37, "learning_rate": 9.072978303747535e-06, "loss": 0.7161, "step": 9898, "task_loss": 0.5283360481262207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9756755828857422, "epoch": 8.37, "learning_rate": 9.068282145205223e-06, "loss": 0.6548, "step": 9899, "task_loss": 0.49449053406715393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6273257732391357, "epoch": 8.37, "learning_rate": 9.06358598666291e-06, "loss": 0.7109, "step": 9900, "task_loss": 1.0453829765319824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8271505832672119, "epoch": 8.37, "learning_rate": 9.058889828120598e-06, "loss": 0.6645, "step": 9901, "task_loss": 0.996527373790741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7079529762268066, "epoch": 8.37, "learning_rate": 9.054193669578286e-06, "loss": 0.7915, "step": 9902, "task_loss": 0.8270381689071655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33039700984954834, "epoch": 8.37, "learning_rate": 9.049497511035974e-06, "loss": 0.4255, "step": 9903, "task_loss": 0.2217865139245987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6737038493156433, "epoch": 8.37, "learning_rate": 9.04480135249366e-06, "loss": 0.6288, "step": 9904, "task_loss": 0.29218789935112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5450755953788757, "epoch": 8.37, "learning_rate": 9.040105193951348e-06, "loss": 0.4729, "step": 9905, "task_loss": 0.5251120924949646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6233633160591125, "epoch": 8.37, "learning_rate": 9.035409035409036e-06, "loss": 0.6858, "step": 9906, "task_loss": 1.2015820741653442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4151989817619324, "epoch": 8.37, "learning_rate": 9.030712876866723e-06, "loss": 0.4651, "step": 9907, "task_loss": 0.36585453152656555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5143096446990967, "epoch": 8.38, "learning_rate": 9.02601671832441e-06, "loss": 0.4965, "step": 9908, "task_loss": 1.0332845449447632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32539841532707214, "epoch": 8.38, "learning_rate": 9.021320559782099e-06, "loss": 0.4651, "step": 9909, "task_loss": 0.9570243954658508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4106248617172241, "epoch": 8.38, "learning_rate": 9.016624401239785e-06, "loss": 0.4728, "step": 9910, "task_loss": 0.4500006139278412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8526034355163574, "epoch": 8.38, "learning_rate": 9.011928242697473e-06, "loss": 0.5754, "step": 9911, "task_loss": 1.5576403141021729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4953818917274475, "epoch": 8.38, "learning_rate": 9.007232084155163e-06, "loss": 0.4717, "step": 9912, "task_loss": 0.28068995475769043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4799301326274872, "epoch": 8.38, "learning_rate": 9.002535925612849e-06, "loss": 0.5014, "step": 9913, "task_loss": 1.0224019289016724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6385120153427124, "epoch": 8.38, "learning_rate": 8.997839767070537e-06, "loss": 0.5695, "step": 9914, "task_loss": 0.5912219285964966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.28694677352905273, "epoch": 8.38, "learning_rate": 8.993143608528225e-06, "loss": 0.4294, "step": 9915, "task_loss": 0.06467054784297943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31715989112854004, "epoch": 8.38, "learning_rate": 8.988447449985911e-06, "loss": 0.5563, "step": 9916, "task_loss": 0.3906678259372711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34355399012565613, "epoch": 8.38, "learning_rate": 8.9837512914436e-06, "loss": 0.6996, "step": 9917, "task_loss": 0.3040136396884918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3769904375076294, "epoch": 8.38, "learning_rate": 8.979055132901288e-06, "loss": 0.6016, "step": 9918, "task_loss": 0.38139641284942627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6292154788970947, "epoch": 8.38, "learning_rate": 8.974358974358976e-06, "loss": 0.4974, "step": 9919, "task_loss": 1.0779393911361694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6328438520431519, "epoch": 8.39, "learning_rate": 8.969662815816662e-06, "loss": 0.6177, "step": 9920, "task_loss": 1.2412317991256714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2791447639465332, "epoch": 8.39, "learning_rate": 8.96496665727435e-06, "loss": 0.4518, "step": 9921, "task_loss": 0.28696921467781067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32893073558807373, "epoch": 8.39, "learning_rate": 8.960270498732038e-06, "loss": 0.6136, "step": 9922, "task_loss": 0.40786704421043396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45489537715911865, "epoch": 8.39, "learning_rate": 8.955574340189724e-06, "loss": 0.5848, "step": 9923, "task_loss": 0.32128071784973145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.369137704372406, "epoch": 8.39, "learning_rate": 8.950878181647412e-06, "loss": 0.5209, "step": 9924, "task_loss": 0.5043324828147888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0312092304229736, "epoch": 8.39, "learning_rate": 8.9461820231051e-06, "loss": 0.7119, "step": 9925, "task_loss": 0.926745593547821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5292906165122986, "epoch": 8.39, "learning_rate": 8.941485864562787e-06, "loss": 0.4967, "step": 9926, "task_loss": 1.065666913986206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6551141142845154, "epoch": 8.39, "learning_rate": 8.936789706020475e-06, "loss": 0.6435, "step": 9927, "task_loss": 0.5677711963653564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5943963527679443, "epoch": 8.39, "learning_rate": 8.932093547478164e-06, "loss": 0.5556, "step": 9928, "task_loss": 0.8048388361930847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36496707797050476, "epoch": 8.39, "learning_rate": 8.92739738893585e-06, "loss": 0.4799, "step": 9929, "task_loss": 0.06369548290967941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7184954881668091, "epoch": 8.39, "learning_rate": 8.922701230393539e-06, "loss": 0.5923, "step": 9930, "task_loss": 0.5940498113632202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6484407782554626, "epoch": 8.39, "learning_rate": 8.918005071851227e-06, "loss": 0.5376, "step": 9931, "task_loss": 0.536079466342926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4732667803764343, "epoch": 8.4, "learning_rate": 8.913308913308913e-06, "loss": 0.6848, "step": 9932, "task_loss": 0.3019295036792755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5853879451751709, "epoch": 8.4, "learning_rate": 8.908612754766601e-06, "loss": 0.5243, "step": 9933, "task_loss": 0.6658563613891602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3944270610809326, "epoch": 8.4, "learning_rate": 8.90391659622429e-06, "loss": 0.4227, "step": 9934, "task_loss": 0.5648531913757324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.454731822013855, "epoch": 8.4, "learning_rate": 8.899220437681977e-06, "loss": 0.4814, "step": 9935, "task_loss": 0.5632500052452087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35517680644989014, "epoch": 8.4, "learning_rate": 8.894524279139664e-06, "loss": 0.4559, "step": 9936, "task_loss": 0.22559256851673126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3193563222885132, "epoch": 8.4, "learning_rate": 8.889828120597352e-06, "loss": 0.4528, "step": 9937, "task_loss": 0.5373182892799377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5327782034873962, "epoch": 8.4, "learning_rate": 8.88513196205504e-06, "loss": 0.5053, "step": 9938, "task_loss": 0.40290749073028564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41691267490386963, "epoch": 8.4, "learning_rate": 8.880435803512726e-06, "loss": 0.5267, "step": 9939, "task_loss": 0.4602459669113159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5974124670028687, "epoch": 8.4, "learning_rate": 8.875739644970414e-06, "loss": 0.4192, "step": 9940, "task_loss": 0.3035341501235962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5586434602737427, "epoch": 8.4, "learning_rate": 8.871043486428102e-06, "loss": 0.5366, "step": 9941, "task_loss": 0.7637397050857544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3908994793891907, "epoch": 8.4, "learning_rate": 8.866347327885788e-06, "loss": 0.5364, "step": 9942, "task_loss": 0.6628063917160034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8916000723838806, "epoch": 8.4, "learning_rate": 8.861651169343478e-06, "loss": 0.6333, "step": 9943, "task_loss": 0.5372015237808228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3201720118522644, "epoch": 8.41, "learning_rate": 8.856955010801166e-06, "loss": 0.5497, "step": 9944, "task_loss": 0.1300886571407318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6689893007278442, "epoch": 8.41, "learning_rate": 8.852258852258853e-06, "loss": 0.591, "step": 9945, "task_loss": 0.42856791615486145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7338372468948364, "epoch": 8.41, "learning_rate": 8.84756269371654e-06, "loss": 0.7889, "step": 9946, "task_loss": 0.6906619668006897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5129604339599609, "epoch": 8.41, "learning_rate": 8.842866535174229e-06, "loss": 0.5455, "step": 9947, "task_loss": 0.9401592016220093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5186076760292053, "epoch": 8.41, "learning_rate": 8.838170376631915e-06, "loss": 0.6367, "step": 9948, "task_loss": 0.40860050916671753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6678972244262695, "epoch": 8.41, "learning_rate": 8.833474218089603e-06, "loss": 0.5957, "step": 9949, "task_loss": 2.0507402420043945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.30523765087127686, "epoch": 8.41, "learning_rate": 8.828778059547291e-06, "loss": 0.5029, "step": 9950, "task_loss": 0.20450808107852936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9576424360275269, "epoch": 8.41, "learning_rate": 8.824081901004979e-06, "loss": 0.6518, "step": 9951, "task_loss": 0.9779417514801025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.702560544013977, "epoch": 8.41, "learning_rate": 8.819385742462665e-06, "loss": 0.5473, "step": 9952, "task_loss": 0.4719312787055969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3083204925060272, "epoch": 8.41, "learning_rate": 8.814689583920353e-06, "loss": 0.3939, "step": 9953, "task_loss": 0.4921172857284546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5613561272621155, "epoch": 8.41, "learning_rate": 8.809993425378042e-06, "loss": 0.6099, "step": 9954, "task_loss": 1.0095082521438599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.25376221537590027, "epoch": 8.41, "learning_rate": 8.805297266835728e-06, "loss": 0.5342, "step": 9955, "task_loss": 0.8700712323188782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6446865797042847, "epoch": 8.42, "learning_rate": 8.800601108293416e-06, "loss": 0.6327, "step": 9956, "task_loss": 1.6571260690689087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5364055633544922, "epoch": 8.42, "learning_rate": 8.795904949751104e-06, "loss": 0.3956, "step": 9957, "task_loss": 0.6053025126457214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3479476273059845, "epoch": 8.42, "learning_rate": 8.791208791208792e-06, "loss": 0.4502, "step": 9958, "task_loss": 0.7703725695610046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6348315477371216, "epoch": 8.42, "learning_rate": 8.78651263266648e-06, "loss": 0.4974, "step": 9959, "task_loss": 0.9038735032081604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29407039284706116, "epoch": 8.42, "learning_rate": 8.781816474124168e-06, "loss": 0.4931, "step": 9960, "task_loss": 0.39095234870910645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45890116691589355, "epoch": 8.42, "learning_rate": 8.777120315581854e-06, "loss": 0.4637, "step": 9961, "task_loss": 0.13498470187187195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5950273275375366, "epoch": 8.42, "learning_rate": 8.772424157039542e-06, "loss": 0.5542, "step": 9962, "task_loss": 0.7343186736106873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49420085549354553, "epoch": 8.42, "learning_rate": 8.76772799849723e-06, "loss": 0.452, "step": 9963, "task_loss": 1.658153772354126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.554752767086029, "epoch": 8.42, "learning_rate": 8.763031839954917e-06, "loss": 0.9073, "step": 9964, "task_loss": 1.1110239028930664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34837576746940613, "epoch": 8.42, "learning_rate": 8.758335681412605e-06, "loss": 0.4758, "step": 9965, "task_loss": 0.953647255897522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5865325927734375, "epoch": 8.42, "learning_rate": 8.753639522870293e-06, "loss": 0.5595, "step": 9966, "task_loss": 0.5222908854484558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6970024108886719, "epoch": 8.42, "learning_rate": 8.74894336432798e-06, "loss": 0.5961, "step": 9967, "task_loss": 0.577081561088562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7551167011260986, "epoch": 8.43, "learning_rate": 8.744247205785667e-06, "loss": 0.711, "step": 9968, "task_loss": 1.6761959791183472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5481575727462769, "epoch": 8.43, "learning_rate": 8.739551047243355e-06, "loss": 0.4576, "step": 9969, "task_loss": 0.2726275622844696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4967893362045288, "epoch": 8.43, "learning_rate": 8.734854888701043e-06, "loss": 0.5203, "step": 9970, "task_loss": 1.1364524364471436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43314385414123535, "epoch": 8.43, "learning_rate": 8.73015873015873e-06, "loss": 0.418, "step": 9971, "task_loss": 0.1649569720029831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2573692202568054, "epoch": 8.43, "learning_rate": 8.725462571616418e-06, "loss": 0.5147, "step": 9972, "task_loss": 0.41610220074653625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5173876285552979, "epoch": 8.43, "learning_rate": 8.720766413074106e-06, "loss": 0.6037, "step": 9973, "task_loss": 0.972903311252594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7050071358680725, "epoch": 8.43, "learning_rate": 8.716070254531794e-06, "loss": 0.6649, "step": 9974, "task_loss": 1.2041131258010864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6060593724250793, "epoch": 8.43, "learning_rate": 8.711374095989482e-06, "loss": 0.5342, "step": 9975, "task_loss": 0.7018749117851257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8023450374603271, "epoch": 8.43, "learning_rate": 8.70667793744717e-06, "loss": 0.6287, "step": 9976, "task_loss": 0.7074559926986694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7595524787902832, "epoch": 8.43, "learning_rate": 8.701981778904856e-06, "loss": 0.6043, "step": 9977, "task_loss": 0.8795921802520752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6877189874649048, "epoch": 8.43, "learning_rate": 8.697285620362544e-06, "loss": 0.4976, "step": 9978, "task_loss": 1.044566035270691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5345941781997681, "epoch": 8.44, "learning_rate": 8.692589461820232e-06, "loss": 0.5833, "step": 9979, "task_loss": 1.9076734781265259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6296684741973877, "epoch": 8.44, "learning_rate": 8.687893303277919e-06, "loss": 0.7033, "step": 9980, "task_loss": 1.15028977394104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47182655334472656, "epoch": 8.44, "learning_rate": 8.683197144735607e-06, "loss": 0.4659, "step": 9981, "task_loss": 0.3365863263607025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7630257606506348, "epoch": 8.44, "learning_rate": 8.678500986193295e-06, "loss": 0.6836, "step": 9982, "task_loss": 1.4018406867980957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34671255946159363, "epoch": 8.44, "learning_rate": 8.673804827650981e-06, "loss": 0.4945, "step": 9983, "task_loss": 0.41963204741477966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5294520258903503, "epoch": 8.44, "learning_rate": 8.669108669108669e-06, "loss": 0.5447, "step": 9984, "task_loss": 0.7812509536743164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3410568833351135, "epoch": 8.44, "learning_rate": 8.664412510566357e-06, "loss": 0.6221, "step": 9985, "task_loss": 0.15880019962787628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48044198751449585, "epoch": 8.44, "learning_rate": 8.659716352024045e-06, "loss": 0.5781, "step": 9986, "task_loss": 0.5167142748832703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7111037969589233, "epoch": 8.44, "learning_rate": 8.655020193481731e-06, "loss": 0.6123, "step": 9987, "task_loss": 1.098706841468811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3397709131240845, "epoch": 8.44, "learning_rate": 8.65032403493942e-06, "loss": 0.5229, "step": 9988, "task_loss": 1.0164237022399902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4307636618614197, "epoch": 8.44, "learning_rate": 8.645627876397107e-06, "loss": 0.4388, "step": 9989, "task_loss": 0.491019070148468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48988622426986694, "epoch": 8.44, "learning_rate": 8.640931717854795e-06, "loss": 0.5522, "step": 9990, "task_loss": 0.7029507756233215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7538944482803345, "epoch": 8.45, "learning_rate": 8.636235559312484e-06, "loss": 0.5785, "step": 9991, "task_loss": 0.6025558114051819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9210150241851807, "epoch": 8.45, "learning_rate": 8.631539400770172e-06, "loss": 0.6655, "step": 9992, "task_loss": 0.9294440150260925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38670963048934937, "epoch": 8.45, "learning_rate": 8.626843242227858e-06, "loss": 0.6077, "step": 9993, "task_loss": 0.13264557719230652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6403752565383911, "epoch": 8.45, "learning_rate": 8.622147083685546e-06, "loss": 0.6419, "step": 9994, "task_loss": 0.5801419615745544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5080069303512573, "epoch": 8.45, "learning_rate": 8.617450925143234e-06, "loss": 0.5144, "step": 9995, "task_loss": 0.6372517347335815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3346964418888092, "epoch": 8.45, "learning_rate": 8.61275476660092e-06, "loss": 0.5864, "step": 9996, "task_loss": 0.38653481006622314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35807663202285767, "epoch": 8.45, "learning_rate": 8.608058608058608e-06, "loss": 0.6161, "step": 9997, "task_loss": 1.2064698934555054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39267003536224365, "epoch": 8.45, "learning_rate": 8.603362449516296e-06, "loss": 0.5331, "step": 9998, "task_loss": 0.6395618319511414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0313843488693237, "epoch": 8.45, "learning_rate": 8.598666290973983e-06, "loss": 0.7156, "step": 9999, "task_loss": 1.5607761144638062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5509908199310303, "epoch": 8.45, "learning_rate": 8.59397013243167e-06, "loss": 0.5621, "step": 10000, "task_loss": 0.5020503997802734 }, { "epoch": 8.45, "eval_accuracy": 0.9020990099009901, "eval_loss": 0.3622073829174042, "eval_runtime": 224.2294, "eval_samples_per_second": 112.608, "eval_steps_per_second": 0.883, "step": 10000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37101680040359497, "epoch": 8.45, "learning_rate": 8.589273973889359e-06, "loss": 0.4186, "step": 10001, "task_loss": 0.7971658110618591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6165363192558289, "epoch": 8.45, "learning_rate": 8.584577815347047e-06, "loss": 0.5619, "step": 10002, "task_loss": 0.46457961201667786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33817583322525024, "epoch": 8.46, "learning_rate": 8.579881656804733e-06, "loss": 0.4634, "step": 10003, "task_loss": 0.46920937299728394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.497064471244812, "epoch": 8.46, "learning_rate": 8.575185498262421e-06, "loss": 0.5601, "step": 10004, "task_loss": 0.25871267914772034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5214940309524536, "epoch": 8.46, "learning_rate": 8.57048933972011e-06, "loss": 0.6235, "step": 10005, "task_loss": 0.23004209995269775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44482487440109253, "epoch": 8.46, "learning_rate": 8.565793181177797e-06, "loss": 0.4584, "step": 10006, "task_loss": 0.711748480796814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7691066265106201, "epoch": 8.46, "learning_rate": 8.561097022635485e-06, "loss": 0.6713, "step": 10007, "task_loss": 0.2423478066921234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38161054253578186, "epoch": 8.46, "learning_rate": 8.556400864093173e-06, "loss": 0.4964, "step": 10008, "task_loss": 0.5280135869979858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4427378177642822, "epoch": 8.46, "learning_rate": 8.55170470555086e-06, "loss": 0.4374, "step": 10009, "task_loss": 0.18802271783351898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5038244724273682, "epoch": 8.46, "learning_rate": 8.547008547008548e-06, "loss": 0.5485, "step": 10010, "task_loss": 0.7689385414123535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5859662890434265, "epoch": 8.46, "learning_rate": 8.542312388466236e-06, "loss": 0.6468, "step": 10011, "task_loss": 0.6543267965316772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9541316628456116, "epoch": 8.46, "learning_rate": 8.537616229923922e-06, "loss": 0.6541, "step": 10012, "task_loss": 1.2197362184524536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6645013093948364, "epoch": 8.46, "learning_rate": 8.53292007138161e-06, "loss": 0.5451, "step": 10013, "task_loss": 1.0929698944091797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6594057083129883, "epoch": 8.46, "learning_rate": 8.528223912839298e-06, "loss": 0.488, "step": 10014, "task_loss": 1.540675401687622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34254661202430725, "epoch": 8.47, "learning_rate": 8.523527754296984e-06, "loss": 0.671, "step": 10015, "task_loss": 0.5127291679382324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7179667353630066, "epoch": 8.47, "learning_rate": 8.518831595754672e-06, "loss": 0.6149, "step": 10016, "task_loss": 0.3743670880794525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6210370659828186, "epoch": 8.47, "learning_rate": 8.51413543721236e-06, "loss": 0.5735, "step": 10017, "task_loss": 0.5044481754302979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6191599369049072, "epoch": 8.47, "learning_rate": 8.509439278670049e-06, "loss": 0.5588, "step": 10018, "task_loss": 0.6484429836273193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3383939564228058, "epoch": 8.47, "learning_rate": 8.504743120127735e-06, "loss": 0.3805, "step": 10019, "task_loss": 0.36585724353790283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5028634071350098, "epoch": 8.47, "learning_rate": 8.500046961585425e-06, "loss": 0.5486, "step": 10020, "task_loss": 0.10809022933244705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5139552354812622, "epoch": 8.47, "learning_rate": 8.495350803043111e-06, "loss": 0.4256, "step": 10021, "task_loss": 0.22486156225204468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3617074489593506, "epoch": 8.47, "learning_rate": 8.490654644500799e-06, "loss": 0.4657, "step": 10022, "task_loss": 0.6195650696754456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5049826502799988, "epoch": 8.47, "learning_rate": 8.485958485958487e-06, "loss": 0.5307, "step": 10023, "task_loss": 1.04332435131073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5909141302108765, "epoch": 8.47, "learning_rate": 8.481262327416175e-06, "loss": 0.5645, "step": 10024, "task_loss": 0.724266767501831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.611441969871521, "epoch": 8.47, "learning_rate": 8.476566168873861e-06, "loss": 0.5633, "step": 10025, "task_loss": 1.347034215927124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7526014447212219, "epoch": 8.47, "learning_rate": 8.47187001033155e-06, "loss": 0.6715, "step": 10026, "task_loss": 0.3056427538394928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6947050094604492, "epoch": 8.48, "learning_rate": 8.467173851789237e-06, "loss": 0.5971, "step": 10027, "task_loss": 0.8832241296768188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6429755091667175, "epoch": 8.48, "learning_rate": 8.462477693246924e-06, "loss": 0.5133, "step": 10028, "task_loss": 0.8911112546920776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8265464305877686, "epoch": 8.48, "learning_rate": 8.457781534704612e-06, "loss": 0.6694, "step": 10029, "task_loss": 1.0259562730789185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42612266540527344, "epoch": 8.48, "learning_rate": 8.4530853761623e-06, "loss": 0.5401, "step": 10030, "task_loss": 0.6490663886070251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5416198372840881, "epoch": 8.48, "learning_rate": 8.448389217619986e-06, "loss": 0.5281, "step": 10031, "task_loss": 1.2180722951889038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4631580412387848, "epoch": 8.48, "learning_rate": 8.443693059077674e-06, "loss": 0.5796, "step": 10032, "task_loss": 0.771698534488678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4416426718235016, "epoch": 8.48, "learning_rate": 8.438996900535362e-06, "loss": 0.5183, "step": 10033, "task_loss": 0.12403969466686249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2794508934020996, "epoch": 8.48, "learning_rate": 8.43430074199305e-06, "loss": 0.5083, "step": 10034, "task_loss": 0.533685564994812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3129080832004547, "epoch": 8.48, "learning_rate": 8.429604583450738e-06, "loss": 0.483, "step": 10035, "task_loss": 0.3608945906162262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45834997296333313, "epoch": 8.48, "learning_rate": 8.424908424908426e-06, "loss": 0.4611, "step": 10036, "task_loss": 0.7497334480285645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32172897458076477, "epoch": 8.48, "learning_rate": 8.420212266366113e-06, "loss": 0.5187, "step": 10037, "task_loss": 0.779827892780304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8236785531044006, "epoch": 8.48, "learning_rate": 8.4155161078238e-06, "loss": 0.6162, "step": 10038, "task_loss": 0.7641831040382385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6241268515586853, "epoch": 8.49, "learning_rate": 8.410819949281489e-06, "loss": 0.477, "step": 10039, "task_loss": 1.2550685405731201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6671326160430908, "epoch": 8.49, "learning_rate": 8.406123790739177e-06, "loss": 0.6213, "step": 10040, "task_loss": 0.5089460015296936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4064231514930725, "epoch": 8.49, "learning_rate": 8.401427632196863e-06, "loss": 0.5336, "step": 10041, "task_loss": 0.3870660364627838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8995629549026489, "epoch": 8.49, "learning_rate": 8.396731473654551e-06, "loss": 0.6947, "step": 10042, "task_loss": 0.9275874495506287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3483434021472931, "epoch": 8.49, "learning_rate": 8.39203531511224e-06, "loss": 0.633, "step": 10043, "task_loss": 0.8168389201164246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6000936627388, "epoch": 8.49, "learning_rate": 8.387339156569926e-06, "loss": 0.5661, "step": 10044, "task_loss": 0.8069629669189453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6719015836715698, "epoch": 8.49, "learning_rate": 8.382642998027614e-06, "loss": 0.5906, "step": 10045, "task_loss": 0.8274479508399963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6149609088897705, "epoch": 8.49, "learning_rate": 8.377946839485302e-06, "loss": 0.4859, "step": 10046, "task_loss": 1.3549875020980835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5977377891540527, "epoch": 8.49, "learning_rate": 8.373250680942988e-06, "loss": 0.4023, "step": 10047, "task_loss": 0.5117701888084412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.501828670501709, "epoch": 8.49, "learning_rate": 8.368554522400676e-06, "loss": 0.492, "step": 10048, "task_loss": 0.7537463307380676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38984087109565735, "epoch": 8.49, "learning_rate": 8.363858363858364e-06, "loss": 0.5443, "step": 10049, "task_loss": 0.3401387631893158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8699605464935303, "epoch": 8.5, "learning_rate": 8.35916220531605e-06, "loss": 0.6928, "step": 10050, "task_loss": 0.2880379259586334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3873152732849121, "epoch": 8.5, "learning_rate": 8.35446604677374e-06, "loss": 0.4589, "step": 10051, "task_loss": 0.16070134937763214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5625858902931213, "epoch": 8.5, "learning_rate": 8.349769888231428e-06, "loss": 0.6843, "step": 10052, "task_loss": 0.5567559003829956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5363856554031372, "epoch": 8.5, "learning_rate": 8.345073729689114e-06, "loss": 0.6299, "step": 10053, "task_loss": 1.087275505065918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6343284249305725, "epoch": 8.5, "learning_rate": 8.340377571146803e-06, "loss": 0.6333, "step": 10054, "task_loss": 0.4372407793998718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6217770576477051, "epoch": 8.5, "learning_rate": 8.33568141260449e-06, "loss": 0.5343, "step": 10055, "task_loss": 0.2652208209037781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6808849573135376, "epoch": 8.5, "learning_rate": 8.330985254062177e-06, "loss": 0.6112, "step": 10056, "task_loss": 1.0284450054168701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48038631677627563, "epoch": 8.5, "learning_rate": 8.326289095519865e-06, "loss": 0.5902, "step": 10057, "task_loss": 1.4475725889205933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5391125679016113, "epoch": 8.5, "learning_rate": 8.321592936977553e-06, "loss": 0.7815, "step": 10058, "task_loss": 0.9168317317962646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4191219210624695, "epoch": 8.5, "learning_rate": 8.316896778435241e-06, "loss": 0.4846, "step": 10059, "task_loss": 0.8553874492645264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7497739195823669, "epoch": 8.5, "learning_rate": 8.312200619892927e-06, "loss": 0.5398, "step": 10060, "task_loss": 0.244801864027977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5746974349021912, "epoch": 8.5, "learning_rate": 8.307504461350615e-06, "loss": 0.6637, "step": 10061, "task_loss": 1.0055179595947266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.475117951631546, "epoch": 8.51, "learning_rate": 8.302808302808303e-06, "loss": 0.4797, "step": 10062, "task_loss": 0.5751802325248718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3211360573768616, "epoch": 8.51, "learning_rate": 8.29811214426599e-06, "loss": 0.4524, "step": 10063, "task_loss": 0.1122012808918953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4079163670539856, "epoch": 8.51, "learning_rate": 8.293415985723678e-06, "loss": 0.4996, "step": 10064, "task_loss": 0.4432033896446228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.755780816078186, "epoch": 8.51, "learning_rate": 8.288719827181366e-06, "loss": 0.7037, "step": 10065, "task_loss": 1.1345174312591553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3153594136238098, "epoch": 8.51, "learning_rate": 8.284023668639054e-06, "loss": 0.6435, "step": 10066, "task_loss": 0.34938281774520874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5408148169517517, "epoch": 8.51, "learning_rate": 8.279327510096742e-06, "loss": 0.7715, "step": 10067, "task_loss": 0.4785350561141968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2991587817668915, "epoch": 8.51, "learning_rate": 8.27463135155443e-06, "loss": 0.6452, "step": 10068, "task_loss": 0.43624621629714966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4055976867675781, "epoch": 8.51, "learning_rate": 8.269935193012116e-06, "loss": 0.5321, "step": 10069, "task_loss": 0.5454167723655701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4091840386390686, "epoch": 8.51, "learning_rate": 8.265239034469804e-06, "loss": 0.3855, "step": 10070, "task_loss": 0.9297168254852295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5463947057723999, "epoch": 8.51, "learning_rate": 8.260542875927492e-06, "loss": 0.5343, "step": 10071, "task_loss": 0.6821601986885071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4775974750518799, "epoch": 8.51, "learning_rate": 8.255846717385179e-06, "loss": 0.4175, "step": 10072, "task_loss": 0.8626708388328552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7705935835838318, "epoch": 8.51, "learning_rate": 8.251150558842867e-06, "loss": 0.588, "step": 10073, "task_loss": 0.7717249989509583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6983813047409058, "epoch": 8.52, "learning_rate": 8.246454400300555e-06, "loss": 0.6504, "step": 10074, "task_loss": 0.936234712600708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5273043513298035, "epoch": 8.52, "learning_rate": 8.241758241758243e-06, "loss": 0.5042, "step": 10075, "task_loss": 0.756459653377533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39501944184303284, "epoch": 8.52, "learning_rate": 8.237062083215929e-06, "loss": 0.6299, "step": 10076, "task_loss": 0.834712028503418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.731654167175293, "epoch": 8.52, "learning_rate": 8.232365924673617e-06, "loss": 0.6639, "step": 10077, "task_loss": 0.8432891964912415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40823420882225037, "epoch": 8.52, "learning_rate": 8.227669766131305e-06, "loss": 0.5687, "step": 10078, "task_loss": 0.33937713503837585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7059333324432373, "epoch": 8.52, "learning_rate": 8.222973607588992e-06, "loss": 0.7772, "step": 10079, "task_loss": 0.7310298681259155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6309678554534912, "epoch": 8.52, "learning_rate": 8.21827744904668e-06, "loss": 0.5526, "step": 10080, "task_loss": 1.5185391902923584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5286114811897278, "epoch": 8.52, "learning_rate": 8.213581290504368e-06, "loss": 0.577, "step": 10081, "task_loss": 0.3460964858531952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5109664797782898, "epoch": 8.52, "learning_rate": 8.208885131962056e-06, "loss": 0.6165, "step": 10082, "task_loss": 1.0350745916366577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5292120575904846, "epoch": 8.52, "learning_rate": 8.204188973419744e-06, "loss": 0.6772, "step": 10083, "task_loss": 0.8461248874664307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6079181432723999, "epoch": 8.52, "learning_rate": 8.199492814877432e-06, "loss": 0.6417, "step": 10084, "task_loss": 0.74967360496521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4560070037841797, "epoch": 8.52, "learning_rate": 8.194796656335118e-06, "loss": 0.5555, "step": 10085, "task_loss": 0.08348466455936432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6852565407752991, "epoch": 8.53, "learning_rate": 8.190100497792806e-06, "loss": 0.6798, "step": 10086, "task_loss": 1.099755048751831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2642732560634613, "epoch": 8.53, "learning_rate": 8.185404339250494e-06, "loss": 0.4978, "step": 10087, "task_loss": 0.642951488494873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4706138074398041, "epoch": 8.53, "learning_rate": 8.18070818070818e-06, "loss": 0.5645, "step": 10088, "task_loss": 0.7805179357528687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3704680800437927, "epoch": 8.53, "learning_rate": 8.176012022165868e-06, "loss": 0.4532, "step": 10089, "task_loss": 1.447042465209961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5216692090034485, "epoch": 8.53, "learning_rate": 8.171315863623556e-06, "loss": 0.6235, "step": 10090, "task_loss": 0.8950048685073853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4785998463630676, "epoch": 8.53, "learning_rate": 8.166619705081245e-06, "loss": 0.578, "step": 10091, "task_loss": 1.077392578125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4186705946922302, "epoch": 8.53, "learning_rate": 8.161923546538931e-06, "loss": 0.5251, "step": 10092, "task_loss": 0.2809906005859375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6139517426490784, "epoch": 8.53, "learning_rate": 8.157227387996619e-06, "loss": 0.4175, "step": 10093, "task_loss": 0.2587749660015106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43012675642967224, "epoch": 8.53, "learning_rate": 8.152531229454307e-06, "loss": 0.4731, "step": 10094, "task_loss": 0.16787084937095642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4010626971721649, "epoch": 8.53, "learning_rate": 8.147835070911993e-06, "loss": 0.5257, "step": 10095, "task_loss": 0.2224569022655487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4598948657512665, "epoch": 8.53, "learning_rate": 8.143138912369681e-06, "loss": 0.5318, "step": 10096, "task_loss": 0.4738600254058838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4947717487812042, "epoch": 8.53, "learning_rate": 8.138442753827371e-06, "loss": 0.5115, "step": 10097, "task_loss": 0.67691570520401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6723331212997437, "epoch": 8.54, "learning_rate": 8.133746595285057e-06, "loss": 0.5173, "step": 10098, "task_loss": 1.2659869194030762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4278014898300171, "epoch": 8.54, "learning_rate": 8.129050436742745e-06, "loss": 0.5007, "step": 10099, "task_loss": 0.6515257358551025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4268699288368225, "epoch": 8.54, "learning_rate": 8.124354278200433e-06, "loss": 0.4909, "step": 10100, "task_loss": 0.18258006870746613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47645384073257446, "epoch": 8.54, "learning_rate": 8.11965811965812e-06, "loss": 0.5067, "step": 10101, "task_loss": 0.4557371139526367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4495584964752197, "epoch": 8.54, "learning_rate": 8.114961961115808e-06, "loss": 0.4616, "step": 10102, "task_loss": 0.6619590520858765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43248888850212097, "epoch": 8.54, "learning_rate": 8.110265802573496e-06, "loss": 0.6752, "step": 10103, "task_loss": 0.5454780459403992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5800113081932068, "epoch": 8.54, "learning_rate": 8.105569644031182e-06, "loss": 0.5987, "step": 10104, "task_loss": 1.0855543613433838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.505826473236084, "epoch": 8.54, "learning_rate": 8.10087348548887e-06, "loss": 0.5913, "step": 10105, "task_loss": 0.7457238435745239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.1813811957836151, "epoch": 8.54, "learning_rate": 8.096177326946558e-06, "loss": 0.5267, "step": 10106, "task_loss": 0.2317916452884674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6119740009307861, "epoch": 8.54, "learning_rate": 8.091481168404246e-06, "loss": 0.6766, "step": 10107, "task_loss": 0.8941870331764221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4819161295890808, "epoch": 8.54, "learning_rate": 8.086785009861933e-06, "loss": 0.4187, "step": 10108, "task_loss": 0.5182462930679321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0491060018539429, "epoch": 8.54, "learning_rate": 8.08208885131962e-06, "loss": 0.7402, "step": 10109, "task_loss": 0.17116479575634003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45087820291519165, "epoch": 8.55, "learning_rate": 8.077392692777309e-06, "loss": 0.5123, "step": 10110, "task_loss": 0.7308047413825989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27497270703315735, "epoch": 8.55, "learning_rate": 8.072696534234995e-06, "loss": 0.4337, "step": 10111, "task_loss": 0.4427964985370636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.656880795955658, "epoch": 8.55, "learning_rate": 8.068000375692685e-06, "loss": 0.5481, "step": 10112, "task_loss": 0.5902817845344543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5193115472793579, "epoch": 8.55, "learning_rate": 8.063304217150373e-06, "loss": 0.4868, "step": 10113, "task_loss": 1.2673028707504272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6995452642440796, "epoch": 8.55, "learning_rate": 8.058608058608059e-06, "loss": 0.5599, "step": 10114, "task_loss": 1.9889076948165894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6898229718208313, "epoch": 8.55, "learning_rate": 8.053911900065747e-06, "loss": 0.5651, "step": 10115, "task_loss": 0.9333986043930054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5352186560630798, "epoch": 8.55, "learning_rate": 8.049215741523435e-06, "loss": 0.7106, "step": 10116, "task_loss": 1.665730595588684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2904912829399109, "epoch": 8.55, "learning_rate": 8.044519582981122e-06, "loss": 0.5067, "step": 10117, "task_loss": 2.2780590057373047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40469786524772644, "epoch": 8.55, "learning_rate": 8.03982342443881e-06, "loss": 0.4868, "step": 10118, "task_loss": 1.0346777439117432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45317691564559937, "epoch": 8.55, "learning_rate": 8.035127265896498e-06, "loss": 0.582, "step": 10119, "task_loss": 0.534895658493042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3065503239631653, "epoch": 8.55, "learning_rate": 8.030431107354184e-06, "loss": 0.4189, "step": 10120, "task_loss": 0.6212037801742554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3278815746307373, "epoch": 8.56, "learning_rate": 8.025734948811872e-06, "loss": 0.5294, "step": 10121, "task_loss": 0.6766582131385803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5146729946136475, "epoch": 8.56, "learning_rate": 8.02103879026956e-06, "loss": 0.4293, "step": 10122, "task_loss": 0.2335144579410553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4684683084487915, "epoch": 8.56, "learning_rate": 8.016342631727246e-06, "loss": 0.5218, "step": 10123, "task_loss": 0.7247099280357361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.458418607711792, "epoch": 8.56, "learning_rate": 8.011646473184934e-06, "loss": 0.4281, "step": 10124, "task_loss": 0.13822361826896667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2294926941394806, "epoch": 8.56, "learning_rate": 8.006950314642622e-06, "loss": 0.4391, "step": 10125, "task_loss": 0.06351035088300705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44035905599594116, "epoch": 8.56, "learning_rate": 8.00225415610031e-06, "loss": 0.5372, "step": 10126, "task_loss": 1.0231698751449585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42991960048675537, "epoch": 8.56, "learning_rate": 7.997557997557997e-06, "loss": 0.6394, "step": 10127, "task_loss": 0.2673608362674713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42540907859802246, "epoch": 8.56, "learning_rate": 7.992861839015687e-06, "loss": 0.5386, "step": 10128, "task_loss": 0.2819347083568573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39422327280044556, "epoch": 8.56, "learning_rate": 7.988165680473373e-06, "loss": 0.4658, "step": 10129, "task_loss": 0.609510600566864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48496150970458984, "epoch": 8.56, "learning_rate": 7.983469521931061e-06, "loss": 0.5354, "step": 10130, "task_loss": 0.4238905906677246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5386257171630859, "epoch": 8.56, "learning_rate": 7.978773363388749e-06, "loss": 0.5238, "step": 10131, "task_loss": 0.5035752654075623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6323145627975464, "epoch": 8.56, "learning_rate": 7.974077204846437e-06, "loss": 0.5482, "step": 10132, "task_loss": 1.656120777130127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6924723982810974, "epoch": 8.57, "learning_rate": 7.969381046304123e-06, "loss": 0.5173, "step": 10133, "task_loss": 0.7235202193260193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5209144353866577, "epoch": 8.57, "learning_rate": 7.964684887761811e-06, "loss": 0.6808, "step": 10134, "task_loss": 0.8030140995979309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5280538201332092, "epoch": 8.57, "learning_rate": 7.9599887292195e-06, "loss": 0.5138, "step": 10135, "task_loss": 0.4552634060382843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6776187419891357, "epoch": 8.57, "learning_rate": 7.955292570677186e-06, "loss": 0.622, "step": 10136, "task_loss": 0.5481429100036621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2642475366592407, "epoch": 8.57, "learning_rate": 7.950596412134874e-06, "loss": 0.5114, "step": 10137, "task_loss": 0.05019498988986015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.591653048992157, "epoch": 8.57, "learning_rate": 7.945900253592562e-06, "loss": 0.5364, "step": 10138, "task_loss": 0.23529018461704254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.499803364276886, "epoch": 8.57, "learning_rate": 7.941204095050248e-06, "loss": 0.4918, "step": 10139, "task_loss": 0.5585193037986755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46978169679641724, "epoch": 8.57, "learning_rate": 7.936507936507936e-06, "loss": 0.6112, "step": 10140, "task_loss": 0.2891625165939331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33234351873397827, "epoch": 8.57, "learning_rate": 7.931811777965624e-06, "loss": 0.4567, "step": 10141, "task_loss": 0.41077470779418945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0490368604660034, "epoch": 8.57, "learning_rate": 7.927115619423312e-06, "loss": 0.4487, "step": 10142, "task_loss": 0.7008001804351807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5036971569061279, "epoch": 8.57, "learning_rate": 7.922419460881e-06, "loss": 0.5269, "step": 10143, "task_loss": 0.6286510825157166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45210927724838257, "epoch": 8.57, "learning_rate": 7.917723302338688e-06, "loss": 0.493, "step": 10144, "task_loss": 0.667473554611206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5487678647041321, "epoch": 8.58, "learning_rate": 7.913027143796375e-06, "loss": 0.5175, "step": 10145, "task_loss": 0.32564985752105713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29815828800201416, "epoch": 8.58, "learning_rate": 7.908330985254063e-06, "loss": 0.4553, "step": 10146, "task_loss": 0.30274081230163574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7619922161102295, "epoch": 8.58, "learning_rate": 7.90363482671175e-06, "loss": 0.4888, "step": 10147, "task_loss": 1.0702823400497437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49775928258895874, "epoch": 8.58, "learning_rate": 7.898938668169439e-06, "loss": 0.7797, "step": 10148, "task_loss": 0.8524059057235718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3698400855064392, "epoch": 8.58, "learning_rate": 7.894242509627125e-06, "loss": 0.5478, "step": 10149, "task_loss": 0.8091466426849365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.844707190990448, "epoch": 8.58, "learning_rate": 7.889546351084813e-06, "loss": 0.4837, "step": 10150, "task_loss": 0.8320833444595337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5126816034317017, "epoch": 8.58, "learning_rate": 7.884850192542501e-06, "loss": 0.5943, "step": 10151, "task_loss": 0.23551969230175018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46498727798461914, "epoch": 8.58, "learning_rate": 7.880154034000187e-06, "loss": 0.5025, "step": 10152, "task_loss": 0.4750136137008667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7831730246543884, "epoch": 8.58, "learning_rate": 7.875457875457876e-06, "loss": 0.7706, "step": 10153, "task_loss": 1.1654505729675293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43594270944595337, "epoch": 8.58, "learning_rate": 7.870761716915564e-06, "loss": 0.6745, "step": 10154, "task_loss": 0.2119455635547638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6060187816619873, "epoch": 8.58, "learning_rate": 7.86606555837325e-06, "loss": 0.5648, "step": 10155, "task_loss": 0.4748207926750183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5644362568855286, "epoch": 8.58, "learning_rate": 7.861369399830938e-06, "loss": 0.5516, "step": 10156, "task_loss": 0.5103318691253662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5919208526611328, "epoch": 8.59, "learning_rate": 7.856673241288626e-06, "loss": 0.5834, "step": 10157, "task_loss": 0.17614521086215973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3600713014602661, "epoch": 8.59, "learning_rate": 7.851977082746314e-06, "loss": 0.359, "step": 10158, "task_loss": 0.5138518810272217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47028249502182007, "epoch": 8.59, "learning_rate": 7.847280924204002e-06, "loss": 0.6003, "step": 10159, "task_loss": 0.5458168387413025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5460010170936584, "epoch": 8.59, "learning_rate": 7.84258476566169e-06, "loss": 0.5595, "step": 10160, "task_loss": 0.6644831299781799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3545892834663391, "epoch": 8.59, "learning_rate": 7.837888607119376e-06, "loss": 0.5289, "step": 10161, "task_loss": 0.3942517936229706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.782636284828186, "epoch": 8.59, "learning_rate": 7.833192448577064e-06, "loss": 0.5763, "step": 10162, "task_loss": 1.3412435054779053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5562647581100464, "epoch": 8.59, "learning_rate": 7.828496290034752e-06, "loss": 0.5328, "step": 10163, "task_loss": 1.0432581901550293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.660818338394165, "epoch": 8.59, "learning_rate": 7.82380013149244e-06, "loss": 0.5186, "step": 10164, "task_loss": 0.4902637004852295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5361002087593079, "epoch": 8.59, "learning_rate": 7.819103972950127e-06, "loss": 0.5371, "step": 10165, "task_loss": 0.6293748617172241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.23404069244861603, "epoch": 8.59, "learning_rate": 7.814407814407815e-06, "loss": 0.4476, "step": 10166, "task_loss": 0.24538524448871613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49338629841804504, "epoch": 8.59, "learning_rate": 7.809711655865503e-06, "loss": 0.4537, "step": 10167, "task_loss": 0.4014953672885895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4308285415172577, "epoch": 8.59, "learning_rate": 7.80501549732319e-06, "loss": 0.4976, "step": 10168, "task_loss": 0.49279162287712097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5321001410484314, "epoch": 8.6, "learning_rate": 7.800319338780877e-06, "loss": 0.5921, "step": 10169, "task_loss": 0.7921851873397827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7364492416381836, "epoch": 8.6, "learning_rate": 7.795623180238565e-06, "loss": 0.6965, "step": 10170, "task_loss": 1.3062325716018677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7496349811553955, "epoch": 8.6, "learning_rate": 7.790927021696252e-06, "loss": 0.691, "step": 10171, "task_loss": 0.6418201923370361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4535045623779297, "epoch": 8.6, "learning_rate": 7.78623086315394e-06, "loss": 0.5252, "step": 10172, "task_loss": 0.6283706426620483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5378760099411011, "epoch": 8.6, "learning_rate": 7.781534704611628e-06, "loss": 0.6498, "step": 10173, "task_loss": 0.18339066207408905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44475841522216797, "epoch": 8.6, "learning_rate": 7.776838546069316e-06, "loss": 0.5876, "step": 10174, "task_loss": 1.3859630823135376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3444172739982605, "epoch": 8.6, "learning_rate": 7.772142387527004e-06, "loss": 0.482, "step": 10175, "task_loss": 0.22412382066249847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.522566556930542, "epoch": 8.6, "learning_rate": 7.767446228984692e-06, "loss": 0.57, "step": 10176, "task_loss": 0.6650126576423645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6375028491020203, "epoch": 8.6, "learning_rate": 7.762750070442378e-06, "loss": 0.5218, "step": 10177, "task_loss": 0.4516354203224182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38384950160980225, "epoch": 8.6, "learning_rate": 7.758053911900066e-06, "loss": 0.4821, "step": 10178, "task_loss": 0.7780612111091614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5558518171310425, "epoch": 8.6, "learning_rate": 7.753357753357754e-06, "loss": 0.4704, "step": 10179, "task_loss": 0.33721861243247986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2776353061199188, "epoch": 8.6, "learning_rate": 7.748661594815442e-06, "loss": 0.5819, "step": 10180, "task_loss": 0.03408713638782501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7343758344650269, "epoch": 8.61, "learning_rate": 7.743965436273129e-06, "loss": 0.7079, "step": 10181, "task_loss": 0.5480161309242249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5904061794281006, "epoch": 8.61, "learning_rate": 7.739269277730817e-06, "loss": 0.6691, "step": 10182, "task_loss": 0.7121056914329529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3108549118041992, "epoch": 8.61, "learning_rate": 7.734573119188505e-06, "loss": 0.3412, "step": 10183, "task_loss": 0.7224665284156799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43585941195487976, "epoch": 8.61, "learning_rate": 7.729876960646191e-06, "loss": 0.4839, "step": 10184, "task_loss": 0.42834940552711487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5602713823318481, "epoch": 8.61, "learning_rate": 7.725180802103879e-06, "loss": 0.4296, "step": 10185, "task_loss": 0.9487757086753845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4807954430580139, "epoch": 8.61, "learning_rate": 7.720484643561567e-06, "loss": 0.4951, "step": 10186, "task_loss": 0.5736557245254517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4550151228904724, "epoch": 8.61, "learning_rate": 7.715788485019253e-06, "loss": 0.4931, "step": 10187, "task_loss": 0.8452522158622742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34222501516342163, "epoch": 8.61, "learning_rate": 7.711092326476941e-06, "loss": 0.5663, "step": 10188, "task_loss": 0.7781326770782471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4984317719936371, "epoch": 8.61, "learning_rate": 7.70639616793463e-06, "loss": 0.4974, "step": 10189, "task_loss": 0.21855969727039337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5077446699142456, "epoch": 8.61, "learning_rate": 7.701700009392318e-06, "loss": 0.5722, "step": 10190, "task_loss": 1.5036622285842896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6527498960494995, "epoch": 8.61, "learning_rate": 7.697003850850006e-06, "loss": 0.4841, "step": 10191, "task_loss": 0.39531782269477844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.24591276049613953, "epoch": 8.61, "learning_rate": 7.692307692307694e-06, "loss": 0.5771, "step": 10192, "task_loss": 0.1834946870803833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.469183087348938, "epoch": 8.62, "learning_rate": 7.68761153376538e-06, "loss": 0.581, "step": 10193, "task_loss": 0.7112513184547424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5913602709770203, "epoch": 8.62, "learning_rate": 7.682915375223068e-06, "loss": 0.6077, "step": 10194, "task_loss": 1.1016547679901123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5052628517150879, "epoch": 8.62, "learning_rate": 7.678219216680756e-06, "loss": 0.5901, "step": 10195, "task_loss": 1.1546002626419067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6496536731719971, "epoch": 8.62, "learning_rate": 7.673523058138444e-06, "loss": 0.8233, "step": 10196, "task_loss": 0.752048671245575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3922309875488281, "epoch": 8.62, "learning_rate": 7.66882689959613e-06, "loss": 0.6097, "step": 10197, "task_loss": 0.1739882379770279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.433386892080307, "epoch": 8.62, "learning_rate": 7.664130741053818e-06, "loss": 0.493, "step": 10198, "task_loss": 0.7850887179374695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45277324318885803, "epoch": 8.62, "learning_rate": 7.659434582511506e-06, "loss": 0.6229, "step": 10199, "task_loss": 0.5415558815002441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.341644823551178, "epoch": 8.62, "learning_rate": 7.654738423969193e-06, "loss": 0.5749, "step": 10200, "task_loss": 0.7519006729125977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7530404329299927, "epoch": 8.62, "learning_rate": 7.65004226542688e-06, "loss": 0.6865, "step": 10201, "task_loss": 0.36343511939048767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7121493816375732, "epoch": 8.62, "learning_rate": 7.645346106884569e-06, "loss": 0.5724, "step": 10202, "task_loss": 1.257251262664795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6181690096855164, "epoch": 8.62, "learning_rate": 7.640649948342255e-06, "loss": 0.6541, "step": 10203, "task_loss": 0.9674899578094482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6799599528312683, "epoch": 8.63, "learning_rate": 7.635953789799943e-06, "loss": 0.6037, "step": 10204, "task_loss": 1.2386854887008667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49603235721588135, "epoch": 8.63, "learning_rate": 7.631257631257633e-06, "loss": 0.5624, "step": 10205, "task_loss": 0.6215472221374512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34567350149154663, "epoch": 8.63, "learning_rate": 7.62656147271532e-06, "loss": 0.428, "step": 10206, "task_loss": 1.5922130346298218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3648337721824646, "epoch": 8.63, "learning_rate": 7.621865314173007e-06, "loss": 0.5646, "step": 10207, "task_loss": 0.24857215583324432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5153900980949402, "epoch": 8.63, "learning_rate": 7.6171691556306945e-06, "loss": 0.6577, "step": 10208, "task_loss": 0.8255502581596375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5266650915145874, "epoch": 8.63, "learning_rate": 7.6124729970883825e-06, "loss": 0.4839, "step": 10209, "task_loss": 1.6683098077774048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6347926259040833, "epoch": 8.63, "learning_rate": 7.60777683854607e-06, "loss": 0.555, "step": 10210, "task_loss": 0.9817802309989929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5049170255661011, "epoch": 8.63, "learning_rate": 7.603080680003758e-06, "loss": 0.5385, "step": 10211, "task_loss": 1.364454746246338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3198559284210205, "epoch": 8.63, "learning_rate": 7.598384521461445e-06, "loss": 0.6013, "step": 10212, "task_loss": 0.6473468542098999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49151986837387085, "epoch": 8.63, "learning_rate": 7.593688362919132e-06, "loss": 0.5534, "step": 10213, "task_loss": 1.0181680917739868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6568194031715393, "epoch": 8.63, "learning_rate": 7.58899220437682e-06, "loss": 0.549, "step": 10214, "task_loss": 0.3683798313140869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7576325535774231, "epoch": 8.63, "learning_rate": 7.584296045834507e-06, "loss": 0.7205, "step": 10215, "task_loss": 1.251344919204712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8047572374343872, "epoch": 8.64, "learning_rate": 7.579599887292195e-06, "loss": 0.6131, "step": 10216, "task_loss": 1.0298515558242798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5560303330421448, "epoch": 8.64, "learning_rate": 7.5749037287498826e-06, "loss": 0.585, "step": 10217, "task_loss": 0.36622142791748047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5584666132926941, "epoch": 8.64, "learning_rate": 7.57020757020757e-06, "loss": 0.6502, "step": 10218, "task_loss": 0.9161771535873413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3437217175960541, "epoch": 8.64, "learning_rate": 7.565511411665258e-06, "loss": 0.5173, "step": 10219, "task_loss": 0.7055010199546814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37053439021110535, "epoch": 8.64, "learning_rate": 7.560815253122947e-06, "loss": 0.46, "step": 10220, "task_loss": 0.940696656703949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6715033054351807, "epoch": 8.64, "learning_rate": 7.556119094580634e-06, "loss": 0.5629, "step": 10221, "task_loss": 0.9499757289886475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4253823161125183, "epoch": 8.64, "learning_rate": 7.551422936038322e-06, "loss": 0.5163, "step": 10222, "task_loss": 0.8797752857208252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3481941223144531, "epoch": 8.64, "learning_rate": 7.546726777496009e-06, "loss": 0.4398, "step": 10223, "task_loss": 0.49572840332984924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3747459053993225, "epoch": 8.64, "learning_rate": 7.542030618953696e-06, "loss": 0.4959, "step": 10224, "task_loss": 0.11461915820837021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2797236740589142, "epoch": 8.64, "learning_rate": 7.537334460411384e-06, "loss": 0.5562, "step": 10225, "task_loss": 0.4178621470928192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5512809157371521, "epoch": 8.64, "learning_rate": 7.5326383018690715e-06, "loss": 0.6608, "step": 10226, "task_loss": 0.13302959501743317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.532896876335144, "epoch": 8.64, "learning_rate": 7.5279421433267595e-06, "loss": 0.5344, "step": 10227, "task_loss": 1.275534987449646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.893635630607605, "epoch": 8.65, "learning_rate": 7.523245984784447e-06, "loss": 0.598, "step": 10228, "task_loss": 0.5302479267120361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34316325187683105, "epoch": 8.65, "learning_rate": 7.518549826242134e-06, "loss": 0.528, "step": 10229, "task_loss": 0.659183144569397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.507656455039978, "epoch": 8.65, "learning_rate": 7.513853667699822e-06, "loss": 0.6694, "step": 10230, "task_loss": 0.9280571341514587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3757294714450836, "epoch": 8.65, "learning_rate": 7.509157509157509e-06, "loss": 0.5648, "step": 10231, "task_loss": 0.27533984184265137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46176302433013916, "epoch": 8.65, "learning_rate": 7.504461350615197e-06, "loss": 0.6707, "step": 10232, "task_loss": 0.5796146392822266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9447875618934631, "epoch": 8.65, "learning_rate": 7.499765192072884e-06, "loss": 0.55, "step": 10233, "task_loss": 0.8211705684661865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4340348541736603, "epoch": 8.65, "learning_rate": 7.4950690335305715e-06, "loss": 0.695, "step": 10234, "task_loss": 0.7866904735565186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6139956116676331, "epoch": 8.65, "learning_rate": 7.4903728749882596e-06, "loss": 0.5623, "step": 10235, "task_loss": 0.912427544593811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3792707026004791, "epoch": 8.65, "learning_rate": 7.4856767164459484e-06, "loss": 0.4063, "step": 10236, "task_loss": 1.396048903465271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40041613578796387, "epoch": 8.65, "learning_rate": 7.480980557903636e-06, "loss": 0.5526, "step": 10237, "task_loss": 0.5005154609680176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7138791084289551, "epoch": 8.65, "learning_rate": 7.476284399361324e-06, "loss": 0.6597, "step": 10238, "task_loss": 0.29496675729751587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7154604196548462, "epoch": 8.65, "learning_rate": 7.471588240819011e-06, "loss": 0.5407, "step": 10239, "task_loss": 0.5719923377037048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4428641200065613, "epoch": 8.66, "learning_rate": 7.466892082276698e-06, "loss": 0.633, "step": 10240, "task_loss": 1.585810661315918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31383001804351807, "epoch": 8.66, "learning_rate": 7.462195923734386e-06, "loss": 0.5356, "step": 10241, "task_loss": 0.02257499098777771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.504894495010376, "epoch": 8.66, "learning_rate": 7.457499765192073e-06, "loss": 0.5905, "step": 10242, "task_loss": 0.4012575149536133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5257509350776672, "epoch": 8.66, "learning_rate": 7.4528036066497604e-06, "loss": 0.5328, "step": 10243, "task_loss": 0.30759093165397644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0666762590408325, "epoch": 8.66, "learning_rate": 7.4481074481074485e-06, "loss": 0.691, "step": 10244, "task_loss": 1.4708540439605713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.547895610332489, "epoch": 8.66, "learning_rate": 7.443411289565136e-06, "loss": 0.525, "step": 10245, "task_loss": 0.5868865251541138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5265012383460999, "epoch": 8.66, "learning_rate": 7.438715131022824e-06, "loss": 0.6428, "step": 10246, "task_loss": 0.8459095358848572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6326247453689575, "epoch": 8.66, "learning_rate": 7.434018972480511e-06, "loss": 0.6704, "step": 10247, "task_loss": 1.9605354070663452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6322370171546936, "epoch": 8.66, "learning_rate": 7.429322813938198e-06, "loss": 0.5856, "step": 10248, "task_loss": 0.884623646736145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45156383514404297, "epoch": 8.66, "learning_rate": 7.424626655395886e-06, "loss": 0.479, "step": 10249, "task_loss": 0.11320190876722336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.542844295501709, "epoch": 8.66, "learning_rate": 7.419930496853573e-06, "loss": 0.61, "step": 10250, "task_loss": 1.1522495746612549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5021862983703613, "epoch": 8.66, "learning_rate": 7.415234338311262e-06, "loss": 0.6079, "step": 10251, "task_loss": 0.14394943416118622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6764286756515503, "epoch": 8.67, "learning_rate": 7.41053817976895e-06, "loss": 0.6729, "step": 10252, "task_loss": 0.9214707612991333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5311066508293152, "epoch": 8.67, "learning_rate": 7.405842021226637e-06, "loss": 0.5519, "step": 10253, "task_loss": 1.117911458015442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3721465468406677, "epoch": 8.67, "learning_rate": 7.4011458626843246e-06, "loss": 0.5198, "step": 10254, "task_loss": 0.5801568031311035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8276471495628357, "epoch": 8.67, "learning_rate": 7.396449704142013e-06, "loss": 0.5267, "step": 10255, "task_loss": 0.9843397736549377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4004414677619934, "epoch": 8.67, "learning_rate": 7.3917535455997e-06, "loss": 0.5987, "step": 10256, "task_loss": 0.7649359703063965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5579057931900024, "epoch": 8.67, "learning_rate": 7.387057387057388e-06, "loss": 0.6129, "step": 10257, "task_loss": 0.6711314916610718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36436760425567627, "epoch": 8.67, "learning_rate": 7.382361228515075e-06, "loss": 0.5883, "step": 10258, "task_loss": 0.45410484075546265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3689746856689453, "epoch": 8.67, "learning_rate": 7.377665069972762e-06, "loss": 0.4424, "step": 10259, "task_loss": 0.3168219029903412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4545149803161621, "epoch": 8.67, "learning_rate": 7.37296891143045e-06, "loss": 0.5085, "step": 10260, "task_loss": 0.5447614192962646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6196643114089966, "epoch": 8.67, "learning_rate": 7.3682727528881374e-06, "loss": 0.5728, "step": 10261, "task_loss": 1.0375136137008667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5339428782463074, "epoch": 8.67, "learning_rate": 7.3635765943458255e-06, "loss": 0.594, "step": 10262, "task_loss": 0.6203529238700867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4673842787742615, "epoch": 8.67, "learning_rate": 7.358880435803513e-06, "loss": 0.4487, "step": 10263, "task_loss": 0.4558497965335846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5213660001754761, "epoch": 8.68, "learning_rate": 7.3541842772612e-06, "loss": 0.4636, "step": 10264, "task_loss": 0.8078634738922119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9265542030334473, "epoch": 8.68, "learning_rate": 7.349488118718888e-06, "loss": 0.6259, "step": 10265, "task_loss": 0.8941127061843872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.742354154586792, "epoch": 8.68, "learning_rate": 7.344791960176575e-06, "loss": 0.6052, "step": 10266, "task_loss": 1.2008435726165771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7910183072090149, "epoch": 8.68, "learning_rate": 7.340095801634264e-06, "loss": 0.5994, "step": 10267, "task_loss": 1.0176353454589844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.632259726524353, "epoch": 8.68, "learning_rate": 7.335399643091952e-06, "loss": 0.6011, "step": 10268, "task_loss": 1.8747382164001465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38912177085876465, "epoch": 8.68, "learning_rate": 7.330703484549639e-06, "loss": 0.5212, "step": 10269, "task_loss": 0.27572962641716003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6799410581588745, "epoch": 8.68, "learning_rate": 7.326007326007326e-06, "loss": 0.4436, "step": 10270, "task_loss": 0.8411791324615479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.640162467956543, "epoch": 8.68, "learning_rate": 7.321311167465014e-06, "loss": 0.4807, "step": 10271, "task_loss": 0.9753258228302002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4195259213447571, "epoch": 8.68, "learning_rate": 7.3166150089227016e-06, "loss": 0.3837, "step": 10272, "task_loss": 0.7255974411964417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36342835426330566, "epoch": 8.68, "learning_rate": 7.31191885038039e-06, "loss": 0.5968, "step": 10273, "task_loss": 2.2058541774749756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5180836319923401, "epoch": 8.68, "learning_rate": 7.307222691838077e-06, "loss": 0.4571, "step": 10274, "task_loss": 0.3397982716560364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3544296622276306, "epoch": 8.69, "learning_rate": 7.302526533295764e-06, "loss": 0.595, "step": 10275, "task_loss": 0.334780752658844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8240629434585571, "epoch": 8.69, "learning_rate": 7.297830374753452e-06, "loss": 0.5796, "step": 10276, "task_loss": 0.45293891429901123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5190388560295105, "epoch": 8.69, "learning_rate": 7.293134216211139e-06, "loss": 0.5852, "step": 10277, "task_loss": 0.9565756320953369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.720111608505249, "epoch": 8.69, "learning_rate": 7.288438057668827e-06, "loss": 0.5811, "step": 10278, "task_loss": 1.3596115112304688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.101010799407959, "epoch": 8.69, "learning_rate": 7.283741899126514e-06, "loss": 0.6051, "step": 10279, "task_loss": 0.7380756139755249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7352099418640137, "epoch": 8.69, "learning_rate": 7.279045740584202e-06, "loss": 0.679, "step": 10280, "task_loss": 2.167731523513794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.030396819114685, "epoch": 8.69, "learning_rate": 7.27434958204189e-06, "loss": 0.7476, "step": 10281, "task_loss": 1.1265835762023926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4978786110877991, "epoch": 8.69, "learning_rate": 7.2696534234995785e-06, "loss": 0.5401, "step": 10282, "task_loss": 1.2340333461761475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34344053268432617, "epoch": 8.69, "learning_rate": 7.264957264957266e-06, "loss": 0.587, "step": 10283, "task_loss": 0.4153047502040863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5374748706817627, "epoch": 8.69, "learning_rate": 7.260261106414954e-06, "loss": 0.4491, "step": 10284, "task_loss": 0.39828211069107056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35645395517349243, "epoch": 8.69, "learning_rate": 7.255564947872641e-06, "loss": 0.6791, "step": 10285, "task_loss": 0.6019431948661804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4957481920719147, "epoch": 8.69, "learning_rate": 7.250868789330328e-06, "loss": 0.4297, "step": 10286, "task_loss": 0.15316899120807648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4682653844356537, "epoch": 8.7, "learning_rate": 7.246172630788016e-06, "loss": 0.7787, "step": 10287, "task_loss": 0.3759296238422394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32052621245384216, "epoch": 8.7, "learning_rate": 7.241476472245703e-06, "loss": 0.5505, "step": 10288, "task_loss": 0.9249458909034729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47313565015792847, "epoch": 8.7, "learning_rate": 7.236780313703391e-06, "loss": 0.5996, "step": 10289, "task_loss": 0.5757375955581665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42648565769195557, "epoch": 8.7, "learning_rate": 7.2320841551610785e-06, "loss": 0.5193, "step": 10290, "task_loss": 0.3164008557796478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47710704803466797, "epoch": 8.7, "learning_rate": 7.227387996618766e-06, "loss": 0.4778, "step": 10291, "task_loss": 0.8865930438041687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6771504878997803, "epoch": 8.7, "learning_rate": 7.222691838076454e-06, "loss": 0.745, "step": 10292, "task_loss": 1.3706846237182617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39206287264823914, "epoch": 8.7, "learning_rate": 7.217995679534141e-06, "loss": 0.5499, "step": 10293, "task_loss": 0.497734010219574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5327948927879333, "epoch": 8.7, "learning_rate": 7.213299520991829e-06, "loss": 0.7218, "step": 10294, "task_loss": 0.08416713774204254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2584127187728882, "epoch": 8.7, "learning_rate": 7.208603362449516e-06, "loss": 0.538, "step": 10295, "task_loss": 0.11571971327066422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.25282078981399536, "epoch": 8.7, "learning_rate": 7.203907203907203e-06, "loss": 0.516, "step": 10296, "task_loss": 0.5937641263008118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6640199422836304, "epoch": 8.7, "learning_rate": 7.199211045364892e-06, "loss": 0.8499, "step": 10297, "task_loss": 1.0732173919677734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47376248240470886, "epoch": 8.7, "learning_rate": 7.19451488682258e-06, "loss": 0.4933, "step": 10298, "task_loss": 0.39096471667289734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8174406290054321, "epoch": 8.71, "learning_rate": 7.1898187282802675e-06, "loss": 0.5621, "step": 10299, "task_loss": 0.5781065225601196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35636353492736816, "epoch": 8.71, "learning_rate": 7.1851225697379555e-06, "loss": 0.4709, "step": 10300, "task_loss": 0.6934751272201538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6781492233276367, "epoch": 8.71, "learning_rate": 7.180426411195643e-06, "loss": 0.6317, "step": 10301, "task_loss": 0.4075348377227783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5020151138305664, "epoch": 8.71, "learning_rate": 7.17573025265333e-06, "loss": 0.5835, "step": 10302, "task_loss": 0.10311749577522278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3051629364490509, "epoch": 8.71, "learning_rate": 7.171034094111018e-06, "loss": 0.5279, "step": 10303, "task_loss": 0.48167768120765686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5560952425003052, "epoch": 8.71, "learning_rate": 7.166337935568705e-06, "loss": 0.5012, "step": 10304, "task_loss": 0.8663613200187683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6472745537757874, "epoch": 8.71, "learning_rate": 7.161641777026393e-06, "loss": 0.6519, "step": 10305, "task_loss": 1.6200885772705078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49485307931900024, "epoch": 8.71, "learning_rate": 7.15694561848408e-06, "loss": 0.6764, "step": 10306, "task_loss": 0.9869263172149658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7006239891052246, "epoch": 8.71, "learning_rate": 7.1522494599417675e-06, "loss": 0.6908, "step": 10307, "task_loss": 0.9778549671173096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.338640034198761, "epoch": 8.71, "learning_rate": 7.1475533013994555e-06, "loss": 0.6374, "step": 10308, "task_loss": 0.3800492584705353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4954967498779297, "epoch": 8.71, "learning_rate": 7.142857142857143e-06, "loss": 0.5032, "step": 10309, "task_loss": 0.45489731431007385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4592806100845337, "epoch": 8.71, "learning_rate": 7.13816098431483e-06, "loss": 0.4628, "step": 10310, "task_loss": 0.7690982222557068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4532596468925476, "epoch": 8.72, "learning_rate": 7.133464825772518e-06, "loss": 0.5658, "step": 10311, "task_loss": 0.49418187141418457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49163007736206055, "epoch": 8.72, "learning_rate": 7.128768667230205e-06, "loss": 0.5596, "step": 10312, "task_loss": 0.55473393201828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6362015008926392, "epoch": 8.72, "learning_rate": 7.124072508687894e-06, "loss": 0.5176, "step": 10313, "task_loss": 0.6104629039764404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.565541684627533, "epoch": 8.72, "learning_rate": 7.119376350145582e-06, "loss": 0.4306, "step": 10314, "task_loss": 0.23497188091278076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5438326597213745, "epoch": 8.72, "learning_rate": 7.114680191603269e-06, "loss": 0.4289, "step": 10315, "task_loss": 1.1834609508514404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6216580867767334, "epoch": 8.72, "learning_rate": 7.109984033060957e-06, "loss": 0.492, "step": 10316, "task_loss": 0.6886887550354004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3425695598125458, "epoch": 8.72, "learning_rate": 7.1052878745186444e-06, "loss": 0.4648, "step": 10317, "task_loss": 1.1015956401824951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7085755467414856, "epoch": 8.72, "learning_rate": 7.100591715976332e-06, "loss": 0.4928, "step": 10318, "task_loss": 0.8281925916671753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4379734694957733, "epoch": 8.72, "learning_rate": 7.09589555743402e-06, "loss": 0.3869, "step": 10319, "task_loss": 0.9389584064483643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37244096398353577, "epoch": 8.72, "learning_rate": 7.091199398891707e-06, "loss": 0.501, "step": 10320, "task_loss": 0.9318284392356873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7777317762374878, "epoch": 8.72, "learning_rate": 7.086503240349394e-06, "loss": 0.6446, "step": 10321, "task_loss": 0.828283429145813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6091635227203369, "epoch": 8.72, "learning_rate": 7.081807081807082e-06, "loss": 0.5479, "step": 10322, "task_loss": 0.2542153596878052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5256115198135376, "epoch": 8.73, "learning_rate": 7.077110923264769e-06, "loss": 0.7371, "step": 10323, "task_loss": 0.5512587428092957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5456048846244812, "epoch": 8.73, "learning_rate": 7.072414764722457e-06, "loss": 0.5457, "step": 10324, "task_loss": 0.6512589454650879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39893728494644165, "epoch": 8.73, "learning_rate": 7.0677186061801445e-06, "loss": 0.4127, "step": 10325, "task_loss": 0.3688058853149414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5130354166030884, "epoch": 8.73, "learning_rate": 7.063022447637832e-06, "loss": 0.6855, "step": 10326, "task_loss": 1.5583701133728027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8827629685401917, "epoch": 8.73, "learning_rate": 7.05832628909552e-06, "loss": 0.5501, "step": 10327, "task_loss": 1.0466549396514893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3581874370574951, "epoch": 8.73, "learning_rate": 7.053630130553209e-06, "loss": 0.5782, "step": 10328, "task_loss": 0.7296577095985413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4200954735279083, "epoch": 8.73, "learning_rate": 7.048933972010896e-06, "loss": 0.6251, "step": 10329, "task_loss": 0.594664990901947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8243305683135986, "epoch": 8.73, "learning_rate": 7.044237813468584e-06, "loss": 0.6002, "step": 10330, "task_loss": 0.6910610198974609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6939981579780579, "epoch": 8.73, "learning_rate": 7.039541654926271e-06, "loss": 0.7009, "step": 10331, "task_loss": 0.5127297043800354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35204148292541504, "epoch": 8.73, "learning_rate": 7.034845496383958e-06, "loss": 0.5283, "step": 10332, "task_loss": 0.8765199184417725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7166299819946289, "epoch": 8.73, "learning_rate": 7.030149337841646e-06, "loss": 0.5458, "step": 10333, "task_loss": 0.36368927359580994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39583003520965576, "epoch": 8.73, "learning_rate": 7.025453179299333e-06, "loss": 0.5668, "step": 10334, "task_loss": 0.3117450177669525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27636227011680603, "epoch": 8.74, "learning_rate": 7.0207570207570214e-06, "loss": 0.4502, "step": 10335, "task_loss": 0.1571434885263443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4637243151664734, "epoch": 8.74, "learning_rate": 7.016060862214709e-06, "loss": 0.6841, "step": 10336, "task_loss": 1.1589305400848389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2447247505187988, "epoch": 8.74, "learning_rate": 7.011364703672396e-06, "loss": 0.6823, "step": 10337, "task_loss": 1.2702417373657227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41441428661346436, "epoch": 8.74, "learning_rate": 7.006668545130084e-06, "loss": 0.456, "step": 10338, "task_loss": 1.0251376628875732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.50485759973526, "epoch": 8.74, "learning_rate": 7.001972386587771e-06, "loss": 0.6294, "step": 10339, "task_loss": 1.8239771127700806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1067254543304443, "epoch": 8.74, "learning_rate": 6.997276228045459e-06, "loss": 0.8008, "step": 10340, "task_loss": 0.9048143029212952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42911794781684875, "epoch": 8.74, "learning_rate": 6.992580069503146e-06, "loss": 0.4752, "step": 10341, "task_loss": 0.7658334374427795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6723777055740356, "epoch": 8.74, "learning_rate": 6.9878839109608334e-06, "loss": 0.5864, "step": 10342, "task_loss": 0.5678532719612122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5371901392936707, "epoch": 8.74, "learning_rate": 6.9831877524185215e-06, "loss": 0.5769, "step": 10343, "task_loss": 0.6378757953643799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6076279878616333, "epoch": 8.74, "learning_rate": 6.97849159387621e-06, "loss": 0.6402, "step": 10344, "task_loss": 0.946560800075531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5277277827262878, "epoch": 8.74, "learning_rate": 6.9737954353338975e-06, "loss": 0.5628, "step": 10345, "task_loss": 0.549729585647583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4803072214126587, "epoch": 8.75, "learning_rate": 6.9690992767915856e-06, "loss": 0.5724, "step": 10346, "task_loss": 1.5084211826324463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31486260890960693, "epoch": 8.75, "learning_rate": 6.964403118249273e-06, "loss": 0.4973, "step": 10347, "task_loss": 0.49438372254371643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6047093272209167, "epoch": 8.75, "learning_rate": 6.95970695970696e-06, "loss": 0.523, "step": 10348, "task_loss": 0.3034510612487793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.52236008644104, "epoch": 8.75, "learning_rate": 6.955010801164648e-06, "loss": 0.6598, "step": 10349, "task_loss": 0.48576614260673523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.24308131635189056, "epoch": 8.75, "learning_rate": 6.950314642622335e-06, "loss": 0.5414, "step": 10350, "task_loss": 0.24400851130485535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37431013584136963, "epoch": 8.75, "learning_rate": 6.945618484080023e-06, "loss": 0.6181, "step": 10351, "task_loss": 1.1395189762115479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8452708721160889, "epoch": 8.75, "learning_rate": 6.94092232553771e-06, "loss": 0.5939, "step": 10352, "task_loss": 0.9524499773979187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5957819223403931, "epoch": 8.75, "learning_rate": 6.9362261669953976e-06, "loss": 0.7393, "step": 10353, "task_loss": 1.1419602632522583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5553509593009949, "epoch": 8.75, "learning_rate": 6.931530008453086e-06, "loss": 0.7713, "step": 10354, "task_loss": 0.8318396210670471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8199725151062012, "epoch": 8.75, "learning_rate": 6.926833849910773e-06, "loss": 0.5245, "step": 10355, "task_loss": 1.2660688161849976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6098960041999817, "epoch": 8.75, "learning_rate": 6.922137691368461e-06, "loss": 0.604, "step": 10356, "task_loss": 0.7831256985664368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5590202808380127, "epoch": 8.75, "learning_rate": 6.917441532826148e-06, "loss": 0.6435, "step": 10357, "task_loss": 0.6844828128814697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42566007375717163, "epoch": 8.76, "learning_rate": 6.912745374283835e-06, "loss": 0.4853, "step": 10358, "task_loss": 0.3268471658229828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5832395553588867, "epoch": 8.76, "learning_rate": 6.908049215741524e-06, "loss": 0.6413, "step": 10359, "task_loss": 1.6633220911026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7187508344650269, "epoch": 8.76, "learning_rate": 6.903353057199212e-06, "loss": 0.4449, "step": 10360, "task_loss": 0.8923933506011963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37650227546691895, "epoch": 8.76, "learning_rate": 6.898656898656899e-06, "loss": 0.6177, "step": 10361, "task_loss": 0.38793784379959106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6248982548713684, "epoch": 8.76, "learning_rate": 6.893960740114587e-06, "loss": 0.5096, "step": 10362, "task_loss": 0.807874321937561 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6305304765701294, "epoch": 8.76, "learning_rate": 6.8892645815722745e-06, "loss": 0.5868, "step": 10363, "task_loss": 0.6161289811134338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2962028384208679, "epoch": 8.76, "learning_rate": 6.884568423029962e-06, "loss": 0.6642, "step": 10364, "task_loss": 0.27984437346458435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4658322334289551, "epoch": 8.76, "learning_rate": 6.87987226448765e-06, "loss": 0.6251, "step": 10365, "task_loss": 0.3167106509208679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4434145390987396, "epoch": 8.76, "learning_rate": 6.875176105945337e-06, "loss": 0.4099, "step": 10366, "task_loss": 0.56837397813797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5962270498275757, "epoch": 8.76, "learning_rate": 6.870479947403025e-06, "loss": 0.5654, "step": 10367, "task_loss": 0.461140513420105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39050614833831787, "epoch": 8.76, "learning_rate": 6.865783788860712e-06, "loss": 0.7108, "step": 10368, "task_loss": 0.20238077640533447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8133956789970398, "epoch": 8.76, "learning_rate": 6.861087630318399e-06, "loss": 0.7233, "step": 10369, "task_loss": 1.0720692873001099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7192848324775696, "epoch": 8.77, "learning_rate": 6.856391471776087e-06, "loss": 0.4309, "step": 10370, "task_loss": 1.691910982131958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32296839356422424, "epoch": 8.77, "learning_rate": 6.8516953132337745e-06, "loss": 0.6315, "step": 10371, "task_loss": 0.2726096510887146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45748406648635864, "epoch": 8.77, "learning_rate": 6.846999154691463e-06, "loss": 0.4278, "step": 10372, "task_loss": 0.29933440685272217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7175165414810181, "epoch": 8.77, "learning_rate": 6.84230299614915e-06, "loss": 0.5178, "step": 10373, "task_loss": 0.8377612829208374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36453112959861755, "epoch": 8.77, "learning_rate": 6.837606837606839e-06, "loss": 0.4684, "step": 10374, "task_loss": 0.24953007698059082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46937838196754456, "epoch": 8.77, "learning_rate": 6.832910679064526e-06, "loss": 0.5184, "step": 10375, "task_loss": 0.33284661173820496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.434675395488739, "epoch": 8.77, "learning_rate": 6.828214520522214e-06, "loss": 0.4309, "step": 10376, "task_loss": 0.4021533131599426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3960683345794678, "epoch": 8.77, "learning_rate": 6.823518361979901e-06, "loss": 0.4945, "step": 10377, "task_loss": 0.6038599610328674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3149242699146271, "epoch": 8.77, "learning_rate": 6.818822203437589e-06, "loss": 0.4383, "step": 10378, "task_loss": 0.15443336963653564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3236342668533325, "epoch": 8.77, "learning_rate": 6.814126044895276e-06, "loss": 0.3868, "step": 10379, "task_loss": 0.30196723341941833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3063564598560333, "epoch": 8.77, "learning_rate": 6.8094298863529635e-06, "loss": 0.4642, "step": 10380, "task_loss": 1.2919634580612183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3698621392250061, "epoch": 8.77, "learning_rate": 6.8047337278106515e-06, "loss": 0.4549, "step": 10381, "task_loss": 0.16871774196624756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2864348292350769, "epoch": 8.78, "learning_rate": 6.800037569268339e-06, "loss": 0.3428, "step": 10382, "task_loss": 0.934691846370697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5143797397613525, "epoch": 8.78, "learning_rate": 6.795341410726027e-06, "loss": 0.5683, "step": 10383, "task_loss": 0.9234268069267273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6352388858795166, "epoch": 8.78, "learning_rate": 6.790645252183714e-06, "loss": 0.5725, "step": 10384, "task_loss": 0.27944207191467285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6133922934532166, "epoch": 8.78, "learning_rate": 6.785949093641401e-06, "loss": 0.5151, "step": 10385, "task_loss": 0.4377737045288086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5242425203323364, "epoch": 8.78, "learning_rate": 6.781252935099089e-06, "loss": 0.5335, "step": 10386, "task_loss": 0.46485310792922974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7073420286178589, "epoch": 8.78, "learning_rate": 6.776556776556776e-06, "loss": 0.4525, "step": 10387, "task_loss": 0.7885569334030151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27211177349090576, "epoch": 8.78, "learning_rate": 6.7718606180144635e-06, "loss": 0.3779, "step": 10388, "task_loss": 0.2727503478527069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4776107668876648, "epoch": 8.78, "learning_rate": 6.7671644594721515e-06, "loss": 0.6682, "step": 10389, "task_loss": 1.097060203552246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3434045314788818, "epoch": 8.78, "learning_rate": 6.76246830092984e-06, "loss": 0.719, "step": 10390, "task_loss": 0.9614911079406738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3280956745147705, "epoch": 8.78, "learning_rate": 6.757772142387528e-06, "loss": 0.5483, "step": 10391, "task_loss": 0.7056090831756592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44284477829933167, "epoch": 8.78, "learning_rate": 6.753075983845216e-06, "loss": 0.4585, "step": 10392, "task_loss": 0.6883313655853271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4739834666252136, "epoch": 8.78, "learning_rate": 6.748379825302903e-06, "loss": 0.4085, "step": 10393, "task_loss": 0.591938853263855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5046443939208984, "epoch": 8.79, "learning_rate": 6.743683666760591e-06, "loss": 0.563, "step": 10394, "task_loss": 0.6141831278800964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8199476003646851, "epoch": 8.79, "learning_rate": 6.738987508218278e-06, "loss": 0.5046, "step": 10395, "task_loss": 0.4434368312358856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3845810890197754, "epoch": 8.79, "learning_rate": 6.734291349675965e-06, "loss": 0.4384, "step": 10396, "task_loss": 0.30116331577301025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5105284452438354, "epoch": 8.79, "learning_rate": 6.729595191133653e-06, "loss": 0.7532, "step": 10397, "task_loss": 0.2900708317756653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4877791404724121, "epoch": 8.79, "learning_rate": 6.7248990325913404e-06, "loss": 0.4635, "step": 10398, "task_loss": 0.12594617903232574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2686983346939087, "epoch": 8.79, "learning_rate": 6.720202874049028e-06, "loss": 0.5451, "step": 10399, "task_loss": 0.5560266375541687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6871010065078735, "epoch": 8.79, "learning_rate": 6.715506715506716e-06, "loss": 0.6058, "step": 10400, "task_loss": 1.2837603092193604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4755173623561859, "epoch": 8.79, "learning_rate": 6.710810556964403e-06, "loss": 0.4983, "step": 10401, "task_loss": 0.5511015057563782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37132224440574646, "epoch": 8.79, "learning_rate": 6.706114398422091e-06, "loss": 0.5835, "step": 10402, "task_loss": 0.47909629344940186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49961134791374207, "epoch": 8.79, "learning_rate": 6.701418239879778e-06, "loss": 0.5471, "step": 10403, "task_loss": 0.5872474312782288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5998954176902771, "epoch": 8.79, "learning_rate": 6.696722081337465e-06, "loss": 0.5165, "step": 10404, "task_loss": 0.6720244884490967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.587763249874115, "epoch": 8.79, "learning_rate": 6.692025922795154e-06, "loss": 0.6387, "step": 10405, "task_loss": 0.421166330575943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5295152068138123, "epoch": 8.8, "learning_rate": 6.687329764252842e-06, "loss": 0.4352, "step": 10406, "task_loss": 1.696271538734436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5235587358474731, "epoch": 8.8, "learning_rate": 6.682633605710529e-06, "loss": 0.4238, "step": 10407, "task_loss": 0.55076664686203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.257598876953125, "epoch": 8.8, "learning_rate": 6.677937447168217e-06, "loss": 0.4195, "step": 10408, "task_loss": 0.6880891919136047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44855010509490967, "epoch": 8.8, "learning_rate": 6.673241288625905e-06, "loss": 0.4365, "step": 10409, "task_loss": 0.17002137005329132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2819584608078003, "epoch": 8.8, "learning_rate": 6.668545130083592e-06, "loss": 0.3481, "step": 10410, "task_loss": 0.26800668239593506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.895827054977417, "epoch": 8.8, "learning_rate": 6.66384897154128e-06, "loss": 0.6469, "step": 10411, "task_loss": 1.0436720848083496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4269934892654419, "epoch": 8.8, "learning_rate": 6.659152812998967e-06, "loss": 0.6513, "step": 10412, "task_loss": 2.033895969390869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5557476282119751, "epoch": 8.8, "learning_rate": 6.654456654456655e-06, "loss": 0.5112, "step": 10413, "task_loss": 1.092881202697754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7778699994087219, "epoch": 8.8, "learning_rate": 6.649760495914342e-06, "loss": 0.5513, "step": 10414, "task_loss": 0.9790617823600769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7283186912536621, "epoch": 8.8, "learning_rate": 6.645064337372029e-06, "loss": 0.6826, "step": 10415, "task_loss": 1.1160176992416382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4141465425491333, "epoch": 8.8, "learning_rate": 6.6403681788297174e-06, "loss": 0.5258, "step": 10416, "task_loss": 0.08673986792564392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5557206869125366, "epoch": 8.81, "learning_rate": 6.635672020287405e-06, "loss": 0.5714, "step": 10417, "task_loss": 1.4278415441513062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8397420644760132, "epoch": 8.81, "learning_rate": 6.630975861745093e-06, "loss": 0.6956, "step": 10418, "task_loss": 1.2004828453063965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4105285406112671, "epoch": 8.81, "learning_rate": 6.62627970320278e-06, "loss": 0.551, "step": 10419, "task_loss": 0.46315857768058777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36757707595825195, "epoch": 8.81, "learning_rate": 6.621583544660467e-06, "loss": 0.5784, "step": 10420, "task_loss": 1.085241675376892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4397088885307312, "epoch": 8.81, "learning_rate": 6.616887386118156e-06, "loss": 0.4598, "step": 10421, "task_loss": 0.35520580410957336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.25036877393722534, "epoch": 8.81, "learning_rate": 6.612191227575844e-06, "loss": 0.4415, "step": 10422, "task_loss": 0.6575357913970947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32712024450302124, "epoch": 8.81, "learning_rate": 6.607495069033531e-06, "loss": 0.594, "step": 10423, "task_loss": 0.7929939031600952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9619885683059692, "epoch": 8.81, "learning_rate": 6.602798910491219e-06, "loss": 0.5635, "step": 10424, "task_loss": 0.5524195432662964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.30500173568725586, "epoch": 8.81, "learning_rate": 6.598102751948906e-06, "loss": 0.419, "step": 10425, "task_loss": 0.45505908131599426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3952523469924927, "epoch": 8.81, "learning_rate": 6.5934065934065935e-06, "loss": 0.6328, "step": 10426, "task_loss": 0.2552952766418457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6165345907211304, "epoch": 8.81, "learning_rate": 6.5887104348642816e-06, "loss": 0.5939, "step": 10427, "task_loss": 1.2935916185379028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5129610300064087, "epoch": 8.81, "learning_rate": 6.584014276321969e-06, "loss": 0.5219, "step": 10428, "task_loss": 0.8096782565116882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35368865728378296, "epoch": 8.82, "learning_rate": 6.579318117779657e-06, "loss": 0.3794, "step": 10429, "task_loss": 0.8010287880897522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7382724285125732, "epoch": 8.82, "learning_rate": 6.574621959237344e-06, "loss": 0.5513, "step": 10430, "task_loss": 0.7453286051750183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3201567530632019, "epoch": 8.82, "learning_rate": 6.569925800695031e-06, "loss": 0.4855, "step": 10431, "task_loss": 0.826776385307312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6114829778671265, "epoch": 8.82, "learning_rate": 6.565229642152719e-06, "loss": 0.7946, "step": 10432, "task_loss": 1.3054215908050537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.566211462020874, "epoch": 8.82, "learning_rate": 6.560533483610406e-06, "loss": 0.5639, "step": 10433, "task_loss": 0.40298745036125183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.493824303150177, "epoch": 8.82, "learning_rate": 6.555837325068094e-06, "loss": 0.3834, "step": 10434, "task_loss": 0.21850939095020294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3919332027435303, "epoch": 8.82, "learning_rate": 6.551141166525782e-06, "loss": 0.4842, "step": 10435, "task_loss": 0.7651007771492004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40241408348083496, "epoch": 8.82, "learning_rate": 6.5464450079834705e-06, "loss": 0.5695, "step": 10436, "task_loss": 0.5586229562759399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8152157664299011, "epoch": 8.82, "learning_rate": 6.541748849441158e-06, "loss": 0.6103, "step": 10437, "task_loss": 1.617213249206543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6254727840423584, "epoch": 8.82, "learning_rate": 6.537052690898846e-06, "loss": 0.6824, "step": 10438, "task_loss": 1.4289257526397705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.767366886138916, "epoch": 8.82, "learning_rate": 6.532356532356533e-06, "loss": 0.556, "step": 10439, "task_loss": 0.5914978384971619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7573871612548828, "epoch": 8.82, "learning_rate": 6.527660373814221e-06, "loss": 0.5708, "step": 10440, "task_loss": 1.641072392463684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5479714274406433, "epoch": 8.83, "learning_rate": 6.522964215271908e-06, "loss": 0.6025, "step": 10441, "task_loss": 0.6346213817596436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5316697359085083, "epoch": 8.83, "learning_rate": 6.518268056729595e-06, "loss": 0.614, "step": 10442, "task_loss": 0.5819668769836426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6714796423912048, "epoch": 8.83, "learning_rate": 6.513571898187283e-06, "loss": 0.4386, "step": 10443, "task_loss": 1.7359087467193604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5932150483131409, "epoch": 8.83, "learning_rate": 6.5088757396449705e-06, "loss": 0.5182, "step": 10444, "task_loss": 0.7753129601478577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5334748029708862, "epoch": 8.83, "learning_rate": 6.5041795811026586e-06, "loss": 0.555, "step": 10445, "task_loss": 0.14095427095890045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.350587397813797, "epoch": 8.83, "learning_rate": 6.499483422560346e-06, "loss": 0.3673, "step": 10446, "task_loss": 0.38133975863456726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5152999758720398, "epoch": 8.83, "learning_rate": 6.494787264018033e-06, "loss": 0.6261, "step": 10447, "task_loss": 0.9487400650978088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5987451076507568, "epoch": 8.83, "learning_rate": 6.490091105475721e-06, "loss": 0.4817, "step": 10448, "task_loss": 0.29847651720046997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44979605078697205, "epoch": 8.83, "learning_rate": 6.485394946933408e-06, "loss": 0.631, "step": 10449, "task_loss": 0.8264268040657043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5390409231185913, "epoch": 8.83, "learning_rate": 6.480698788391096e-06, "loss": 0.6215, "step": 10450, "task_loss": 0.7545587420463562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7912903428077698, "epoch": 8.83, "learning_rate": 6.476002629848785e-06, "loss": 0.8091, "step": 10451, "task_loss": 1.229210376739502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4896339774131775, "epoch": 8.83, "learning_rate": 6.471306471306472e-06, "loss": 0.5331, "step": 10452, "task_loss": 0.16391853988170624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46610257029533386, "epoch": 8.84, "learning_rate": 6.4666103127641594e-06, "loss": 0.6082, "step": 10453, "task_loss": 0.40372413396835327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0308210849761963, "epoch": 8.84, "learning_rate": 6.4619141542218475e-06, "loss": 0.5858, "step": 10454, "task_loss": 0.8421250581741333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0603078603744507, "epoch": 8.84, "learning_rate": 6.457217995679535e-06, "loss": 0.6641, "step": 10455, "task_loss": 0.8014900088310242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4588017761707306, "epoch": 8.84, "learning_rate": 6.452521837137223e-06, "loss": 0.4993, "step": 10456, "task_loss": 1.1446527242660522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.278428316116333, "epoch": 8.84, "learning_rate": 6.44782567859491e-06, "loss": 0.6165, "step": 10457, "task_loss": 0.8178578019142151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41203421354293823, "epoch": 8.84, "learning_rate": 6.443129520052597e-06, "loss": 0.4471, "step": 10458, "task_loss": 1.4501543045043945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7486724853515625, "epoch": 8.84, "learning_rate": 6.438433361510285e-06, "loss": 0.6027, "step": 10459, "task_loss": 0.7174777984619141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7427054643630981, "epoch": 8.84, "learning_rate": 6.433737202967972e-06, "loss": 0.5834, "step": 10460, "task_loss": 1.1282144784927368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6552466154098511, "epoch": 8.84, "learning_rate": 6.42904104442566e-06, "loss": 0.5952, "step": 10461, "task_loss": 1.2273536920547485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4097461700439453, "epoch": 8.84, "learning_rate": 6.4243448858833475e-06, "loss": 0.644, "step": 10462, "task_loss": 0.9280551671981812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3772211968898773, "epoch": 8.84, "learning_rate": 6.419648727341035e-06, "loss": 0.5291, "step": 10463, "task_loss": 0.40155693888664246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26898396015167236, "epoch": 8.84, "learning_rate": 6.414952568798723e-06, "loss": 0.513, "step": 10464, "task_loss": 0.5565758943557739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5238035321235657, "epoch": 8.85, "learning_rate": 6.41025641025641e-06, "loss": 0.5632, "step": 10465, "task_loss": 0.9992777109146118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6214559078216553, "epoch": 8.85, "learning_rate": 6.405560251714097e-06, "loss": 0.6127, "step": 10466, "task_loss": 0.8110660910606384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2212274968624115, "epoch": 8.85, "learning_rate": 6.400864093171787e-06, "loss": 0.4966, "step": 10467, "task_loss": 1.0217211246490479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37593844532966614, "epoch": 8.85, "learning_rate": 6.396167934629474e-06, "loss": 0.5214, "step": 10468, "task_loss": 0.574214518070221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36339256167411804, "epoch": 8.85, "learning_rate": 6.391471776087161e-06, "loss": 0.5033, "step": 10469, "task_loss": 1.2062584161758423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5629768371582031, "epoch": 8.85, "learning_rate": 6.386775617544849e-06, "loss": 0.6438, "step": 10470, "task_loss": 0.9506111145019531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2501985430717468, "epoch": 8.85, "learning_rate": 6.382079459002536e-06, "loss": 0.478, "step": 10471, "task_loss": 0.5956177115440369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4601002037525177, "epoch": 8.85, "learning_rate": 6.3773833004602245e-06, "loss": 0.5569, "step": 10472, "task_loss": 0.9096618890762329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40452176332473755, "epoch": 8.85, "learning_rate": 6.372687141917912e-06, "loss": 0.6314, "step": 10473, "task_loss": 0.7085459232330322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5138779282569885, "epoch": 8.85, "learning_rate": 6.367990983375599e-06, "loss": 0.444, "step": 10474, "task_loss": 0.40276849269866943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37669748067855835, "epoch": 8.85, "learning_rate": 6.363294824833287e-06, "loss": 0.4848, "step": 10475, "task_loss": 0.3527069091796875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48955416679382324, "epoch": 8.85, "learning_rate": 6.358598666290974e-06, "loss": 0.497, "step": 10476, "task_loss": 0.8091267347335815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6269183158874512, "epoch": 8.86, "learning_rate": 6.353902507748661e-06, "loss": 0.5522, "step": 10477, "task_loss": 0.6170360445976257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2688710689544678, "epoch": 8.86, "learning_rate": 6.349206349206349e-06, "loss": 0.5129, "step": 10478, "task_loss": 0.2568294405937195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6559799909591675, "epoch": 8.86, "learning_rate": 6.3445101906640365e-06, "loss": 0.5726, "step": 10479, "task_loss": 0.29970037937164307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3287980556488037, "epoch": 8.86, "learning_rate": 6.3398140321217245e-06, "loss": 0.4858, "step": 10480, "task_loss": 0.7465109825134277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6492245197296143, "epoch": 8.86, "learning_rate": 6.335117873579412e-06, "loss": 0.624, "step": 10481, "task_loss": 1.3502198457717896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5474361181259155, "epoch": 8.86, "learning_rate": 6.3304217150371006e-06, "loss": 0.728, "step": 10482, "task_loss": 1.2207472324371338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9202613234519958, "epoch": 8.86, "learning_rate": 6.325725556494788e-06, "loss": 0.5955, "step": 10483, "task_loss": 0.37648075819015503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4237348437309265, "epoch": 8.86, "learning_rate": 6.321029397952476e-06, "loss": 0.5443, "step": 10484, "task_loss": 0.28206148743629456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.559496283531189, "epoch": 8.86, "learning_rate": 6.316333239410163e-06, "loss": 0.6149, "step": 10485, "task_loss": 1.1405227184295654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.622838020324707, "epoch": 8.86, "learning_rate": 6.311637080867851e-06, "loss": 0.6017, "step": 10486, "task_loss": 0.6723348498344421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6068942546844482, "epoch": 8.86, "learning_rate": 6.306940922325538e-06, "loss": 0.5956, "step": 10487, "task_loss": 0.7783116102218628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41178029775619507, "epoch": 8.87, "learning_rate": 6.302244763783225e-06, "loss": 0.595, "step": 10488, "task_loss": 1.0203125476837158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.461536705493927, "epoch": 8.87, "learning_rate": 6.297548605240913e-06, "loss": 0.4377, "step": 10489, "task_loss": 0.6743668913841248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5241439938545227, "epoch": 8.87, "learning_rate": 6.292852446698601e-06, "loss": 0.6807, "step": 10490, "task_loss": 0.4924022853374481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5792763829231262, "epoch": 8.87, "learning_rate": 6.288156288156289e-06, "loss": 0.5535, "step": 10491, "task_loss": 0.6193325519561768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41937699913978577, "epoch": 8.87, "learning_rate": 6.283460129613976e-06, "loss": 0.5513, "step": 10492, "task_loss": 0.8435229659080505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6229525804519653, "epoch": 8.87, "learning_rate": 6.278763971071663e-06, "loss": 0.5236, "step": 10493, "task_loss": 1.151229977607727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3961859345436096, "epoch": 8.87, "learning_rate": 6.274067812529351e-06, "loss": 0.4947, "step": 10494, "task_loss": 0.8277165293693542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0511362552642822, "epoch": 8.87, "learning_rate": 6.269371653987038e-06, "loss": 0.6132, "step": 10495, "task_loss": 1.2664514780044556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5868272185325623, "epoch": 8.87, "learning_rate": 6.264675495444726e-06, "loss": 0.5457, "step": 10496, "task_loss": 0.4452003538608551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46334409713745117, "epoch": 8.87, "learning_rate": 6.2599793369024134e-06, "loss": 0.5753, "step": 10497, "task_loss": 0.3896017372608185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.538925290107727, "epoch": 8.87, "learning_rate": 6.255283178360102e-06, "loss": 0.598, "step": 10498, "task_loss": 0.43592318892478943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6237809658050537, "epoch": 8.87, "learning_rate": 6.2505870198177895e-06, "loss": 0.5572, "step": 10499, "task_loss": 0.32883384823799133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5976702570915222, "epoch": 8.88, "learning_rate": 6.245890861275477e-06, "loss": 0.5262, "step": 10500, "task_loss": 1.1106112003326416 }, { "epoch": 8.88, "eval_accuracy": 0.9040792079207921, "eval_loss": 0.35537615418434143, "eval_runtime": 224.5565, "eval_samples_per_second": 112.444, "eval_steps_per_second": 0.882, "step": 10500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6420547962188721, "epoch": 8.88, "learning_rate": 6.241194702733164e-06, "loss": 0.6821, "step": 10501, "task_loss": 0.5643755197525024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5313183069229126, "epoch": 8.88, "learning_rate": 6.236498544190853e-06, "loss": 0.6246, "step": 10502, "task_loss": 0.8020596504211426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5272547602653503, "epoch": 8.88, "learning_rate": 6.23180238564854e-06, "loss": 0.5936, "step": 10503, "task_loss": 0.6976853609085083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6301801204681396, "epoch": 8.88, "learning_rate": 6.227106227106227e-06, "loss": 0.6151, "step": 10504, "task_loss": 1.0572049617767334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43667542934417725, "epoch": 8.88, "learning_rate": 6.222410068563915e-06, "loss": 0.4218, "step": 10505, "task_loss": 0.5438454747200012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3480386734008789, "epoch": 8.88, "learning_rate": 6.217713910021602e-06, "loss": 0.4149, "step": 10506, "task_loss": 0.14713677763938904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4233136773109436, "epoch": 8.88, "learning_rate": 6.21301775147929e-06, "loss": 0.5591, "step": 10507, "task_loss": 0.6959255337715149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7236480116844177, "epoch": 8.88, "learning_rate": 6.2083215929369776e-06, "loss": 0.4816, "step": 10508, "task_loss": 1.0164604187011719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5888813734054565, "epoch": 8.88, "learning_rate": 6.203625434394666e-06, "loss": 0.6136, "step": 10509, "task_loss": 0.5258911848068237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45381736755371094, "epoch": 8.88, "learning_rate": 6.198929275852354e-06, "loss": 0.5053, "step": 10510, "task_loss": 0.2339690625667572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5986266136169434, "epoch": 8.88, "learning_rate": 6.194233117310041e-06, "loss": 0.6281, "step": 10511, "task_loss": 0.9168148636817932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35262468457221985, "epoch": 8.89, "learning_rate": 6.189536958767728e-06, "loss": 0.5369, "step": 10512, "task_loss": 0.3806917071342468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4429953098297119, "epoch": 8.89, "learning_rate": 6.184840800225416e-06, "loss": 0.5621, "step": 10513, "task_loss": 1.1081626415252686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31609320640563965, "epoch": 8.89, "learning_rate": 6.180144641683103e-06, "loss": 0.3348, "step": 10514, "task_loss": 0.4959927797317505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.657959520816803, "epoch": 8.89, "learning_rate": 6.175448483140791e-06, "loss": 0.5272, "step": 10515, "task_loss": 1.439615249633789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0300650596618652, "epoch": 8.89, "learning_rate": 6.1707523245984785e-06, "loss": 0.7145, "step": 10516, "task_loss": 0.5324684977531433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5561068058013916, "epoch": 8.89, "learning_rate": 6.1660561660561665e-06, "loss": 0.5575, "step": 10517, "task_loss": 0.881191074848175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4788322150707245, "epoch": 8.89, "learning_rate": 6.1613600075138545e-06, "loss": 0.629, "step": 10518, "task_loss": 0.8983708024024963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45347365736961365, "epoch": 8.89, "learning_rate": 6.156663848971542e-06, "loss": 0.6194, "step": 10519, "task_loss": 0.1974245309829712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6997556686401367, "epoch": 8.89, "learning_rate": 6.151967690429229e-06, "loss": 0.5701, "step": 10520, "task_loss": 1.0081950426101685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42742469906806946, "epoch": 8.89, "learning_rate": 6.147271531886917e-06, "loss": 0.49, "step": 10521, "task_loss": 0.6097814440727234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1457337141036987, "epoch": 8.89, "learning_rate": 6.142575373344604e-06, "loss": 0.6124, "step": 10522, "task_loss": 1.4366159439086914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4187476336956024, "epoch": 8.89, "learning_rate": 6.137879214802292e-06, "loss": 0.5228, "step": 10523, "task_loss": 0.04688310995697975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6375879049301147, "epoch": 8.9, "learning_rate": 6.133183056259979e-06, "loss": 0.5734, "step": 10524, "task_loss": 0.28477194905281067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.525679349899292, "epoch": 8.9, "learning_rate": 6.128486897717667e-06, "loss": 0.7008, "step": 10525, "task_loss": 0.25129270553588867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26762866973876953, "epoch": 8.9, "learning_rate": 6.123790739175355e-06, "loss": 0.4483, "step": 10526, "task_loss": 0.8145783543586731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2817729115486145, "epoch": 8.9, "learning_rate": 6.119094580633043e-06, "loss": 0.3841, "step": 10527, "task_loss": 0.290501207113266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3191314935684204, "epoch": 8.9, "learning_rate": 6.11439842209073e-06, "loss": 0.4529, "step": 10528, "task_loss": 0.42771467566490173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47695687413215637, "epoch": 8.9, "learning_rate": 6.109702263548418e-06, "loss": 0.582, "step": 10529, "task_loss": 0.8632078170776367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9590201377868652, "epoch": 8.9, "learning_rate": 6.105006105006105e-06, "loss": 0.6717, "step": 10530, "task_loss": 1.861912488937378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6550412178039551, "epoch": 8.9, "learning_rate": 6.100309946463793e-06, "loss": 0.5136, "step": 10531, "task_loss": 1.2308684587478638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3883151710033417, "epoch": 8.9, "learning_rate": 6.095613787921481e-06, "loss": 0.5142, "step": 10532, "task_loss": 1.4191991090774536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8721212148666382, "epoch": 8.9, "learning_rate": 6.090917629379168e-06, "loss": 0.7494, "step": 10533, "task_loss": 1.221500277519226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4936458468437195, "epoch": 8.9, "learning_rate": 6.086221470836856e-06, "loss": 0.5689, "step": 10534, "task_loss": 1.4692726135253906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6748039722442627, "epoch": 8.9, "learning_rate": 6.0815253122945435e-06, "loss": 0.5867, "step": 10535, "task_loss": 1.4874978065490723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7939556837081909, "epoch": 8.91, "learning_rate": 6.076829153752231e-06, "loss": 0.6013, "step": 10536, "task_loss": 1.005001425743103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33024731278419495, "epoch": 8.91, "learning_rate": 6.072132995209919e-06, "loss": 0.4334, "step": 10537, "task_loss": 0.6256821155548096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43595290184020996, "epoch": 8.91, "learning_rate": 6.067436836667606e-06, "loss": 0.5303, "step": 10538, "task_loss": 1.1345590353012085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43415567278862, "epoch": 8.91, "learning_rate": 6.062740678125294e-06, "loss": 0.605, "step": 10539, "task_loss": 1.0812879800796509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5417758226394653, "epoch": 8.91, "learning_rate": 6.058044519582982e-06, "loss": 0.4702, "step": 10540, "task_loss": 1.9333370923995972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.562230110168457, "epoch": 8.91, "learning_rate": 6.053348361040669e-06, "loss": 0.5916, "step": 10541, "task_loss": 0.6321846842765808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.52057284116745, "epoch": 8.91, "learning_rate": 6.048652202498357e-06, "loss": 0.451, "step": 10542, "task_loss": 0.5833584070205688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5641530752182007, "epoch": 8.91, "learning_rate": 6.043956043956044e-06, "loss": 0.5332, "step": 10543, "task_loss": 0.41914454102516174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49323657155036926, "epoch": 8.91, "learning_rate": 6.0392598854137315e-06, "loss": 0.6698, "step": 10544, "task_loss": 0.8137623071670532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7533406019210815, "epoch": 8.91, "learning_rate": 6.03456372687142e-06, "loss": 0.4673, "step": 10545, "task_loss": 1.453739881515503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.396594375371933, "epoch": 8.91, "learning_rate": 6.029867568329107e-06, "loss": 0.6242, "step": 10546, "task_loss": 0.4163069427013397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2598315477371216, "epoch": 8.91, "learning_rate": 6.025171409786794e-06, "loss": 0.4412, "step": 10547, "task_loss": 0.8116433620452881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5596101880073547, "epoch": 8.92, "learning_rate": 6.020475251244483e-06, "loss": 0.5172, "step": 10548, "task_loss": 1.5615427494049072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.213550329208374, "epoch": 8.92, "learning_rate": 6.01577909270217e-06, "loss": 0.6307, "step": 10549, "task_loss": 0.8106436133384705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6654082536697388, "epoch": 8.92, "learning_rate": 6.011082934159857e-06, "loss": 0.6163, "step": 10550, "task_loss": 0.9324057102203369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.658589243888855, "epoch": 8.92, "learning_rate": 6.006386775617545e-06, "loss": 0.5483, "step": 10551, "task_loss": 0.8512132167816162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5584046244621277, "epoch": 8.92, "learning_rate": 6.0016906170752324e-06, "loss": 0.444, "step": 10552, "task_loss": 0.4460335671901703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.529216468334198, "epoch": 8.92, "learning_rate": 5.9969944585329205e-06, "loss": 0.687, "step": 10553, "task_loss": 0.5076419711112976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5059454441070557, "epoch": 8.92, "learning_rate": 5.992298299990608e-06, "loss": 0.5504, "step": 10554, "task_loss": 0.5587508082389832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5142740607261658, "epoch": 8.92, "learning_rate": 5.987602141448296e-06, "loss": 0.5324, "step": 10555, "task_loss": 0.4163316488265991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4056118130683899, "epoch": 8.92, "learning_rate": 5.982905982905984e-06, "loss": 0.4852, "step": 10556, "task_loss": 1.110685110092163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3843294382095337, "epoch": 8.92, "learning_rate": 5.978209824363671e-06, "loss": 0.6374, "step": 10557, "task_loss": 0.9433944225311279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3359653949737549, "epoch": 8.92, "learning_rate": 5.973513665821358e-06, "loss": 0.4077, "step": 10558, "task_loss": 0.8920928835868835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6987612247467041, "epoch": 8.93, "learning_rate": 5.968817507279046e-06, "loss": 0.807, "step": 10559, "task_loss": 0.6926020979881287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5514284372329712, "epoch": 8.93, "learning_rate": 5.964121348736733e-06, "loss": 0.5433, "step": 10560, "task_loss": 0.9957583546638489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0617092847824097, "epoch": 8.93, "learning_rate": 5.959425190194421e-06, "loss": 0.5783, "step": 10561, "task_loss": 1.000017523765564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3169888257980347, "epoch": 8.93, "learning_rate": 5.9547290316521085e-06, "loss": 0.8277, "step": 10562, "task_loss": 1.1960927248001099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3309318423271179, "epoch": 8.93, "learning_rate": 5.9500328731097966e-06, "loss": 0.4471, "step": 10563, "task_loss": 0.5644984841346741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44868773221969604, "epoch": 8.93, "learning_rate": 5.945336714567485e-06, "loss": 0.3971, "step": 10564, "task_loss": 1.1958519220352173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8737192153930664, "epoch": 8.93, "learning_rate": 5.940640556025172e-06, "loss": 0.5708, "step": 10565, "task_loss": 0.8242831826210022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7315505146980286, "epoch": 8.93, "learning_rate": 5.935944397482859e-06, "loss": 0.7394, "step": 10566, "task_loss": 0.9019780158996582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5823584794998169, "epoch": 8.93, "learning_rate": 5.931248238940547e-06, "loss": 0.5788, "step": 10567, "task_loss": 0.18012768030166626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9140540957450867, "epoch": 8.93, "learning_rate": 5.926552080398234e-06, "loss": 0.4923, "step": 10568, "task_loss": 0.3501703143119812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8487803936004639, "epoch": 8.93, "learning_rate": 5.921855921855922e-06, "loss": 0.5835, "step": 10569, "task_loss": 0.6849976778030396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6200735569000244, "epoch": 8.93, "learning_rate": 5.917159763313609e-06, "loss": 0.5819, "step": 10570, "task_loss": 0.2966783046722412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5024434924125671, "epoch": 8.94, "learning_rate": 5.9124636047712974e-06, "loss": 0.6076, "step": 10571, "task_loss": 1.3597824573516846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5305497050285339, "epoch": 8.94, "learning_rate": 5.9077674462289855e-06, "loss": 0.4897, "step": 10572, "task_loss": 0.4419254660606384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5478636026382446, "epoch": 8.94, "learning_rate": 5.903071287686673e-06, "loss": 0.6216, "step": 10573, "task_loss": 0.9752206802368164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2660437226295471, "epoch": 8.94, "learning_rate": 5.89837512914436e-06, "loss": 0.473, "step": 10574, "task_loss": 0.06477607041597366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7558462619781494, "epoch": 8.94, "learning_rate": 5.893678970602048e-06, "loss": 0.4327, "step": 10575, "task_loss": 0.2623491883277893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4793110191822052, "epoch": 8.94, "learning_rate": 5.888982812059735e-06, "loss": 0.5929, "step": 10576, "task_loss": 0.28637102246284485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4196704030036926, "epoch": 8.94, "learning_rate": 5.884286653517423e-06, "loss": 0.5812, "step": 10577, "task_loss": 0.35692161321640015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4795711934566498, "epoch": 8.94, "learning_rate": 5.87959049497511e-06, "loss": 0.7459, "step": 10578, "task_loss": 0.8184309005737305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4405076205730438, "epoch": 8.94, "learning_rate": 5.874894336432798e-06, "loss": 0.5703, "step": 10579, "task_loss": 0.7759891748428345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.320750892162323, "epoch": 8.94, "learning_rate": 5.870198177890486e-06, "loss": 0.4676, "step": 10580, "task_loss": 0.6790202856063843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6451736688613892, "epoch": 8.94, "learning_rate": 5.8655020193481735e-06, "loss": 0.6585, "step": 10581, "task_loss": 0.586154043674469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3461170494556427, "epoch": 8.94, "learning_rate": 5.860805860805861e-06, "loss": 0.4935, "step": 10582, "task_loss": 0.33428093791007996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4098072052001953, "epoch": 8.95, "learning_rate": 5.856109702263549e-06, "loss": 0.5466, "step": 10583, "task_loss": 0.059801969677209854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37971851229667664, "epoch": 8.95, "learning_rate": 5.851413543721236e-06, "loss": 0.515, "step": 10584, "task_loss": 0.11595194786787033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5213980674743652, "epoch": 8.95, "learning_rate": 5.846717385178924e-06, "loss": 0.5571, "step": 10585, "task_loss": 0.2752029597759247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6032611131668091, "epoch": 8.95, "learning_rate": 5.842021226636612e-06, "loss": 0.562, "step": 10586, "task_loss": 0.9509679079055786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4550812542438507, "epoch": 8.95, "learning_rate": 5.837325068094299e-06, "loss": 0.6003, "step": 10587, "task_loss": 1.2311104536056519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6174445748329163, "epoch": 8.95, "learning_rate": 5.832628909551987e-06, "loss": 0.4031, "step": 10588, "task_loss": 0.8585431575775146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5638719797134399, "epoch": 8.95, "learning_rate": 5.8279327510096744e-06, "loss": 0.5222, "step": 10589, "task_loss": 0.8829272985458374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6215947270393372, "epoch": 8.95, "learning_rate": 5.823236592467362e-06, "loss": 0.5721, "step": 10590, "task_loss": 0.3600747585296631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27730077505111694, "epoch": 8.95, "learning_rate": 5.81854043392505e-06, "loss": 0.3983, "step": 10591, "task_loss": 0.4132067859172821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5699563026428223, "epoch": 8.95, "learning_rate": 5.813844275382737e-06, "loss": 0.6227, "step": 10592, "task_loss": 1.1953743696212769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35000061988830566, "epoch": 8.95, "learning_rate": 5.809148116840425e-06, "loss": 0.3978, "step": 10593, "task_loss": 0.30782368779182434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.3654136657714844, "epoch": 8.95, "learning_rate": 5.804451958298113e-06, "loss": 0.7022, "step": 10594, "task_loss": 1.6862043142318726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8862373232841492, "epoch": 8.96, "learning_rate": 5.7997557997558e-06, "loss": 0.6238, "step": 10595, "task_loss": 1.6083306074142456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31328678131103516, "epoch": 8.96, "learning_rate": 5.795059641213488e-06, "loss": 0.4431, "step": 10596, "task_loss": 0.5814842581748962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31923583149909973, "epoch": 8.96, "learning_rate": 5.790363482671175e-06, "loss": 0.505, "step": 10597, "task_loss": 0.7539598345756531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37936264276504517, "epoch": 8.96, "learning_rate": 5.7856673241288625e-06, "loss": 0.4382, "step": 10598, "task_loss": 0.3608052730560303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2500288784503937, "epoch": 8.96, "learning_rate": 5.7809711655865505e-06, "loss": 0.4233, "step": 10599, "task_loss": 0.47196364402770996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46518731117248535, "epoch": 8.96, "learning_rate": 5.776275007044238e-06, "loss": 0.5318, "step": 10600, "task_loss": 0.5062164664268494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2863707542419434, "epoch": 8.96, "learning_rate": 5.771578848501926e-06, "loss": 0.7961, "step": 10601, "task_loss": 0.9925091862678528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7395411133766174, "epoch": 8.96, "learning_rate": 5.766882689959614e-06, "loss": 0.5854, "step": 10602, "task_loss": 1.0946437120437622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7544252276420593, "epoch": 8.96, "learning_rate": 5.762186531417301e-06, "loss": 0.6138, "step": 10603, "task_loss": 0.9084381461143494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4712444543838501, "epoch": 8.96, "learning_rate": 5.757490372874989e-06, "loss": 0.5779, "step": 10604, "task_loss": 0.5418534874916077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5804394483566284, "epoch": 8.96, "learning_rate": 5.752794214332676e-06, "loss": 0.4885, "step": 10605, "task_loss": 1.3616821765899658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6439148187637329, "epoch": 8.96, "learning_rate": 5.748098055790363e-06, "loss": 0.6849, "step": 10606, "task_loss": 0.952193021774292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35849446058273315, "epoch": 8.97, "learning_rate": 5.743401897248051e-06, "loss": 0.5724, "step": 10607, "task_loss": 0.3658808469772339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6236612796783447, "epoch": 8.97, "learning_rate": 5.738705738705739e-06, "loss": 0.6065, "step": 10608, "task_loss": 0.7249253988265991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.1865118145942688, "epoch": 8.97, "learning_rate": 5.734009580163427e-06, "loss": 0.4168, "step": 10609, "task_loss": 0.4059983193874359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5416386127471924, "epoch": 8.97, "learning_rate": 5.729313421621115e-06, "loss": 0.6491, "step": 10610, "task_loss": 0.08619317412376404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4127536714076996, "epoch": 8.97, "learning_rate": 5.724617263078802e-06, "loss": 0.4889, "step": 10611, "task_loss": 1.3284363746643066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6580613255500793, "epoch": 8.97, "learning_rate": 5.71992110453649e-06, "loss": 0.5312, "step": 10612, "task_loss": 0.7715619802474976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5236706137657166, "epoch": 8.97, "learning_rate": 5.715224945994177e-06, "loss": 0.5261, "step": 10613, "task_loss": 0.908225417137146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4879898428916931, "epoch": 8.97, "learning_rate": 5.710528787451864e-06, "loss": 0.4933, "step": 10614, "task_loss": 0.4714900851249695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5864462852478027, "epoch": 8.97, "learning_rate": 5.705832628909552e-06, "loss": 0.508, "step": 10615, "task_loss": 0.7189812660217285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8074636459350586, "epoch": 8.97, "learning_rate": 5.7011364703672395e-06, "loss": 0.7787, "step": 10616, "task_loss": 0.8123316168785095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6987520456314087, "epoch": 8.97, "learning_rate": 5.6964403118249275e-06, "loss": 0.6735, "step": 10617, "task_loss": 0.5292261838912964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6355494260787964, "epoch": 8.97, "learning_rate": 5.6917441532826155e-06, "loss": 0.4987, "step": 10618, "task_loss": 0.9870746731758118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8259447813034058, "epoch": 8.98, "learning_rate": 5.687047994740303e-06, "loss": 0.6188, "step": 10619, "task_loss": 0.4100607633590698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5482206344604492, "epoch": 8.98, "learning_rate": 5.682351836197991e-06, "loss": 0.485, "step": 10620, "task_loss": 0.23007835447788239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44996970891952515, "epoch": 8.98, "learning_rate": 5.677655677655678e-06, "loss": 0.656, "step": 10621, "task_loss": 0.5835139751434326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7870843410491943, "epoch": 8.98, "learning_rate": 5.672959519113365e-06, "loss": 0.5842, "step": 10622, "task_loss": 0.8866196274757385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4002609848976135, "epoch": 8.98, "learning_rate": 5.668263360571053e-06, "loss": 0.5327, "step": 10623, "task_loss": 0.11959858238697052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5889905095100403, "epoch": 8.98, "learning_rate": 5.66356720202874e-06, "loss": 0.6166, "step": 10624, "task_loss": 0.898025393486023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32101938128471375, "epoch": 8.98, "learning_rate": 5.658871043486428e-06, "loss": 0.4533, "step": 10625, "task_loss": 0.47380331158638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3777613043785095, "epoch": 8.98, "learning_rate": 5.6541748849441164e-06, "loss": 0.3863, "step": 10626, "task_loss": 0.3064054846763611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5514116883277893, "epoch": 8.98, "learning_rate": 5.649478726401804e-06, "loss": 0.4836, "step": 10627, "task_loss": 0.34018826484680176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4399358630180359, "epoch": 8.98, "learning_rate": 5.644782567859491e-06, "loss": 0.5457, "step": 10628, "task_loss": 0.6247934103012085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33214887976646423, "epoch": 8.98, "learning_rate": 5.640086409317179e-06, "loss": 0.4334, "step": 10629, "task_loss": 0.35234057903289795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34852495789527893, "epoch": 8.99, "learning_rate": 5.635390250774866e-06, "loss": 0.4465, "step": 10630, "task_loss": 0.9072916507720947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5743696093559265, "epoch": 8.99, "learning_rate": 5.630694092232554e-06, "loss": 0.5103, "step": 10631, "task_loss": 0.41072437167167664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5917646884918213, "epoch": 8.99, "learning_rate": 5.625997933690241e-06, "loss": 0.6469, "step": 10632, "task_loss": 0.5651862621307373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48953795433044434, "epoch": 8.99, "learning_rate": 5.621301775147929e-06, "loss": 0.4699, "step": 10633, "task_loss": 0.7310975790023804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41004741191864014, "epoch": 8.99, "learning_rate": 5.616605616605617e-06, "loss": 0.474, "step": 10634, "task_loss": 0.5718944668769836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4913330376148224, "epoch": 8.99, "learning_rate": 5.6119094580633045e-06, "loss": 0.562, "step": 10635, "task_loss": 0.42646050453186035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5983633995056152, "epoch": 8.99, "learning_rate": 5.607213299520992e-06, "loss": 0.5823, "step": 10636, "task_loss": 0.2602546811103821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5408806204795837, "epoch": 8.99, "learning_rate": 5.60251714097868e-06, "loss": 0.4615, "step": 10637, "task_loss": 0.367552787065506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5105755925178528, "epoch": 8.99, "learning_rate": 5.597820982436367e-06, "loss": 0.5011, "step": 10638, "task_loss": 1.3216745853424072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6889569163322449, "epoch": 8.99, "learning_rate": 5.593124823894055e-06, "loss": 0.4891, "step": 10639, "task_loss": 1.7083390951156616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5840407609939575, "epoch": 8.99, "learning_rate": 5.588428665351743e-06, "loss": 0.5212, "step": 10640, "task_loss": 0.7892876267433167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5186394453048706, "epoch": 8.99, "learning_rate": 5.58373250680943e-06, "loss": 0.4822, "step": 10641, "task_loss": 1.599323034286499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3880797028541565, "epoch": 9.0, "learning_rate": 5.579036348267118e-06, "loss": 0.5901, "step": 10642, "task_loss": 0.13101691007614136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47783488035202026, "epoch": 9.0, "learning_rate": 5.574340189724805e-06, "loss": 0.5412, "step": 10643, "task_loss": 0.36147913336753845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49673253297805786, "epoch": 9.0, "learning_rate": 5.5696440311824926e-06, "loss": 0.5129, "step": 10644, "task_loss": 0.7412142753601074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7691666483879089, "epoch": 9.0, "learning_rate": 5.564947872640181e-06, "loss": 0.4995, "step": 10645, "task_loss": 0.8824101090431213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.484579861164093, "epoch": 9.0, "learning_rate": 5.560251714097868e-06, "loss": 0.5419, "step": 10646, "task_loss": 0.4574880599975586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44032591581344604, "epoch": 9.0, "learning_rate": 5.555555555555556e-06, "loss": 0.487, "step": 10647, "task_loss": 0.7041007280349731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40121930837631226, "epoch": 9.0, "learning_rate": 5.550859397013244e-06, "loss": 0.8202, "step": 10648, "task_loss": 0.30082905292510986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5182110667228699, "epoch": 9.0, "learning_rate": 5.546163238470931e-06, "loss": 0.4821, "step": 10649, "task_loss": 0.5310503840446472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7338597774505615, "epoch": 9.0, "learning_rate": 5.541467079928619e-06, "loss": 0.575, "step": 10650, "task_loss": 0.8483703136444092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40683722496032715, "epoch": 9.0, "learning_rate": 5.536770921386306e-06, "loss": 0.4863, "step": 10651, "task_loss": 0.7259117960929871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3533979654312134, "epoch": 9.0, "learning_rate": 5.5320747628439934e-06, "loss": 0.4222, "step": 10652, "task_loss": 0.18067091703414917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3635929822921753, "epoch": 9.01, "learning_rate": 5.5273786043016815e-06, "loss": 0.4233, "step": 10653, "task_loss": 0.044916242361068726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42419013381004333, "epoch": 9.01, "learning_rate": 5.522682445759369e-06, "loss": 0.5879, "step": 10654, "task_loss": 0.4898815453052521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33666494488716125, "epoch": 9.01, "learning_rate": 5.517986287217057e-06, "loss": 0.4073, "step": 10655, "task_loss": 0.0614178292453289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26912200450897217, "epoch": 9.01, "learning_rate": 5.513290128674745e-06, "loss": 0.5839, "step": 10656, "task_loss": 0.21425390243530273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8947529196739197, "epoch": 9.01, "learning_rate": 5.508593970132432e-06, "loss": 0.7308, "step": 10657, "task_loss": 1.0954711437225342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44005873799324036, "epoch": 9.01, "learning_rate": 5.50389781159012e-06, "loss": 0.3952, "step": 10658, "task_loss": 0.6965355277061462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8052119612693787, "epoch": 9.01, "learning_rate": 5.499201653047807e-06, "loss": 0.6889, "step": 10659, "task_loss": 1.1232020854949951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6039445996284485, "epoch": 9.01, "learning_rate": 5.494505494505494e-06, "loss": 0.4514, "step": 10660, "task_loss": 1.07892906665802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6589264273643494, "epoch": 9.01, "learning_rate": 5.489809335963182e-06, "loss": 0.4742, "step": 10661, "task_loss": 1.4069225788116455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5168144106864929, "epoch": 9.01, "learning_rate": 5.4851131774208696e-06, "loss": 0.584, "step": 10662, "task_loss": 0.9509081244468689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6360461115837097, "epoch": 9.01, "learning_rate": 5.480417018878558e-06, "loss": 0.5743, "step": 10663, "task_loss": 1.1335527896881104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3195936977863312, "epoch": 9.01, "learning_rate": 5.475720860336246e-06, "loss": 0.6223, "step": 10664, "task_loss": 0.061238572001457214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39680543541908264, "epoch": 9.02, "learning_rate": 5.471024701793933e-06, "loss": 0.4963, "step": 10665, "task_loss": 1.3738739490509033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5354663133621216, "epoch": 9.02, "learning_rate": 5.466328543251621e-06, "loss": 0.4588, "step": 10666, "task_loss": 0.39986732602119446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5939179062843323, "epoch": 9.02, "learning_rate": 5.461632384709308e-06, "loss": 0.5114, "step": 10667, "task_loss": 0.8660687804222107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6671440005302429, "epoch": 9.02, "learning_rate": 5.456936226166995e-06, "loss": 0.5394, "step": 10668, "task_loss": 0.7148017883300781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37127476930618286, "epoch": 9.02, "learning_rate": 5.452240067624683e-06, "loss": 0.4626, "step": 10669, "task_loss": 0.6323646903038025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6180217266082764, "epoch": 9.02, "learning_rate": 5.4475439090823704e-06, "loss": 0.4462, "step": 10670, "task_loss": 0.4112711548805237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4207005500793457, "epoch": 9.02, "learning_rate": 5.4428477505400585e-06, "loss": 0.5839, "step": 10671, "task_loss": 0.5924689173698425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6923037767410278, "epoch": 9.02, "learning_rate": 5.4381515919977465e-06, "loss": 0.4904, "step": 10672, "task_loss": 1.2507325410842896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3350827395915985, "epoch": 9.02, "learning_rate": 5.433455433455434e-06, "loss": 0.5769, "step": 10673, "task_loss": 0.3332979381084442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46088075637817383, "epoch": 9.02, "learning_rate": 5.428759274913122e-06, "loss": 0.4961, "step": 10674, "task_loss": 0.8437260985374451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4155154526233673, "epoch": 9.02, "learning_rate": 5.424063116370809e-06, "loss": 0.5081, "step": 10675, "task_loss": 0.24063348770141602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4507129490375519, "epoch": 9.02, "learning_rate": 5.419366957828496e-06, "loss": 0.3753, "step": 10676, "task_loss": 0.4883268475532532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3387559652328491, "epoch": 9.03, "learning_rate": 5.414670799286184e-06, "loss": 0.4246, "step": 10677, "task_loss": 0.0797370970249176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9072496294975281, "epoch": 9.03, "learning_rate": 5.409974640743871e-06, "loss": 0.6749, "step": 10678, "task_loss": 0.7291640043258667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5448293089866638, "epoch": 9.03, "learning_rate": 5.405278482201559e-06, "loss": 0.4625, "step": 10679, "task_loss": 0.6079757809638977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48609963059425354, "epoch": 9.03, "learning_rate": 5.400582323659247e-06, "loss": 0.5168, "step": 10680, "task_loss": 0.3549111783504486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.326621949672699, "epoch": 9.03, "learning_rate": 5.3958861651169346e-06, "loss": 0.5623, "step": 10681, "task_loss": 0.9568971395492554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6141725182533264, "epoch": 9.03, "learning_rate": 5.391190006574623e-06, "loss": 0.5879, "step": 10682, "task_loss": 1.0426660776138306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5327695608139038, "epoch": 9.03, "learning_rate": 5.38649384803231e-06, "loss": 0.4914, "step": 10683, "task_loss": 0.1876465529203415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5823583602905273, "epoch": 9.03, "learning_rate": 5.381797689489997e-06, "loss": 0.5155, "step": 10684, "task_loss": 0.6362265944480896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3821605443954468, "epoch": 9.03, "learning_rate": 5.377101530947685e-06, "loss": 0.4006, "step": 10685, "task_loss": 0.7283532619476318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5649060010910034, "epoch": 9.03, "learning_rate": 5.372405372405373e-06, "loss": 0.5141, "step": 10686, "task_loss": 1.1974315643310547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4989258646965027, "epoch": 9.03, "learning_rate": 5.36770921386306e-06, "loss": 0.6503, "step": 10687, "task_loss": 0.8167094588279724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5260305404663086, "epoch": 9.03, "learning_rate": 5.363013055320748e-06, "loss": 0.5945, "step": 10688, "task_loss": 0.9579104781150818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44966042041778564, "epoch": 9.04, "learning_rate": 5.3583168967784355e-06, "loss": 0.4611, "step": 10689, "task_loss": 0.38625356554985046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3999742567539215, "epoch": 9.04, "learning_rate": 5.3536207382361235e-06, "loss": 0.5365, "step": 10690, "task_loss": 0.8518800139427185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7802377939224243, "epoch": 9.04, "learning_rate": 5.348924579693811e-06, "loss": 0.622, "step": 10691, "task_loss": 1.6303719282150269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6764510273933411, "epoch": 9.04, "learning_rate": 5.344228421151498e-06, "loss": 0.5123, "step": 10692, "task_loss": 0.9120407104492188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36846059560775757, "epoch": 9.04, "learning_rate": 5.339532262609186e-06, "loss": 0.4616, "step": 10693, "task_loss": 1.001678466796875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4473530948162079, "epoch": 9.04, "learning_rate": 5.334836104066874e-06, "loss": 0.5429, "step": 10694, "task_loss": 0.8862698674201965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3290940821170807, "epoch": 9.04, "learning_rate": 5.330139945524561e-06, "loss": 0.4525, "step": 10695, "task_loss": 0.9660460352897644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33058735728263855, "epoch": 9.04, "learning_rate": 5.325443786982249e-06, "loss": 0.4323, "step": 10696, "task_loss": 0.6270085573196411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2998146414756775, "epoch": 9.04, "learning_rate": 5.320747628439936e-06, "loss": 0.4887, "step": 10697, "task_loss": 0.299557089805603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5215908288955688, "epoch": 9.04, "learning_rate": 5.316051469897624e-06, "loss": 0.6213, "step": 10698, "task_loss": 0.7067731618881226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.412331759929657, "epoch": 9.04, "learning_rate": 5.3113553113553116e-06, "loss": 0.3546, "step": 10699, "task_loss": 0.4179603159427643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7369719743728638, "epoch": 9.04, "learning_rate": 5.306659152812999e-06, "loss": 0.5316, "step": 10700, "task_loss": 0.40015709400177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.1557701826095581, "epoch": 9.05, "learning_rate": 5.301962994270687e-06, "loss": 0.4535, "step": 10701, "task_loss": 0.004909770097583532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7552615404129028, "epoch": 9.05, "learning_rate": 5.297266835728375e-06, "loss": 0.7499, "step": 10702, "task_loss": 0.3595063090324402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.757243812084198, "epoch": 9.05, "learning_rate": 5.292570677186062e-06, "loss": 0.4878, "step": 10703, "task_loss": 0.4612920880317688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7450886964797974, "epoch": 9.05, "learning_rate": 5.28787451864375e-06, "loss": 0.5907, "step": 10704, "task_loss": 0.9325494766235352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38350632786750793, "epoch": 9.05, "learning_rate": 5.283178360101437e-06, "loss": 0.4055, "step": 10705, "task_loss": 0.6759563684463501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6079666018486023, "epoch": 9.05, "learning_rate": 5.278482201559124e-06, "loss": 0.8378, "step": 10706, "task_loss": 0.413409024477005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6092326045036316, "epoch": 9.05, "learning_rate": 5.2737860430168124e-06, "loss": 0.6744, "step": 10707, "task_loss": 1.2089263200759888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3733971118927002, "epoch": 9.05, "learning_rate": 5.2690898844745e-06, "loss": 0.5246, "step": 10708, "task_loss": 1.0511474609375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4286840260028839, "epoch": 9.05, "learning_rate": 5.264393725932188e-06, "loss": 0.4138, "step": 10709, "task_loss": 0.17371788620948792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6021478772163391, "epoch": 9.05, "learning_rate": 5.259697567389876e-06, "loss": 0.58, "step": 10710, "task_loss": 0.5650947093963623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6029753088951111, "epoch": 9.05, "learning_rate": 5.255001408847563e-06, "loss": 0.5393, "step": 10711, "task_loss": 0.5631636381149292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.546440064907074, "epoch": 9.05, "learning_rate": 5.250305250305251e-06, "loss": 0.5756, "step": 10712, "task_loss": 0.6061796545982361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33398181200027466, "epoch": 9.06, "learning_rate": 5.245609091762938e-06, "loss": 0.5302, "step": 10713, "task_loss": 0.33227360248565674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43928253650665283, "epoch": 9.06, "learning_rate": 5.240912933220625e-06, "loss": 0.5429, "step": 10714, "task_loss": 0.9597697854042053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7576483488082886, "epoch": 9.06, "learning_rate": 5.236216774678313e-06, "loss": 0.5815, "step": 10715, "task_loss": 1.7227888107299805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3055664598941803, "epoch": 9.06, "learning_rate": 5.2315206161360005e-06, "loss": 0.5738, "step": 10716, "task_loss": 0.5837642550468445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4474610984325409, "epoch": 9.06, "learning_rate": 5.2268244575936885e-06, "loss": 0.5794, "step": 10717, "task_loss": 0.8527469635009766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4749275743961334, "epoch": 9.06, "learning_rate": 5.2221282990513766e-06, "loss": 0.5507, "step": 10718, "task_loss": 0.4687459170818329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6210784912109375, "epoch": 9.06, "learning_rate": 5.217432140509064e-06, "loss": 0.7529, "step": 10719, "task_loss": 0.6175867915153503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47150373458862305, "epoch": 9.06, "learning_rate": 5.212735981966752e-06, "loss": 0.3962, "step": 10720, "task_loss": 0.5332894325256348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3841581642627716, "epoch": 9.06, "learning_rate": 5.208039823424439e-06, "loss": 0.6005, "step": 10721, "task_loss": 0.5226436257362366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27491259574890137, "epoch": 9.06, "learning_rate": 5.203343664882126e-06, "loss": 0.3967, "step": 10722, "task_loss": 0.16011053323745728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3453192114830017, "epoch": 9.06, "learning_rate": 5.198647506339814e-06, "loss": 0.4511, "step": 10723, "task_loss": 0.20817817747592926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2241399735212326, "epoch": 9.07, "learning_rate": 5.193951347797501e-06, "loss": 0.5786, "step": 10724, "task_loss": 0.08580203354358673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37518811225891113, "epoch": 9.07, "learning_rate": 5.189255189255189e-06, "loss": 0.5489, "step": 10725, "task_loss": 0.9821268916130066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3402150273323059, "epoch": 9.07, "learning_rate": 5.1845590307128775e-06, "loss": 0.4744, "step": 10726, "task_loss": 0.6487136483192444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6778649091720581, "epoch": 9.07, "learning_rate": 5.179862872170565e-06, "loss": 0.7196, "step": 10727, "task_loss": 0.9036701321601868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46070533990859985, "epoch": 9.07, "learning_rate": 5.175166713628253e-06, "loss": 0.5339, "step": 10728, "task_loss": 0.45523953437805176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7271194458007812, "epoch": 9.07, "learning_rate": 5.17047055508594e-06, "loss": 0.6503, "step": 10729, "task_loss": 0.7414557337760925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5402107834815979, "epoch": 9.07, "learning_rate": 5.165774396543627e-06, "loss": 0.5069, "step": 10730, "task_loss": 1.215453028678894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5083130598068237, "epoch": 9.07, "learning_rate": 5.161078238001315e-06, "loss": 0.4595, "step": 10731, "task_loss": 0.6019947528839111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26350435614585876, "epoch": 9.07, "learning_rate": 5.156382079459002e-06, "loss": 0.462, "step": 10732, "task_loss": 0.24215155839920044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6530492305755615, "epoch": 9.07, "learning_rate": 5.15168592091669e-06, "loss": 0.6092, "step": 10733, "task_loss": 1.1276872158050537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6274473667144775, "epoch": 9.07, "learning_rate": 5.146989762374378e-06, "loss": 0.6565, "step": 10734, "task_loss": 0.6351217031478882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.14521172642707825, "epoch": 9.07, "learning_rate": 5.1422936038320655e-06, "loss": 0.363, "step": 10735, "task_loss": 0.009979610331356525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6746052503585815, "epoch": 9.08, "learning_rate": 5.1375974452897536e-06, "loss": 0.5717, "step": 10736, "task_loss": 0.6837327480316162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4172007739543915, "epoch": 9.08, "learning_rate": 5.132901286747441e-06, "loss": 0.5572, "step": 10737, "task_loss": 0.9956902265548706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4760212302207947, "epoch": 9.08, "learning_rate": 5.128205128205128e-06, "loss": 0.4695, "step": 10738, "task_loss": 1.6575639247894287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7258514165878296, "epoch": 9.08, "learning_rate": 5.123508969662816e-06, "loss": 0.7002, "step": 10739, "task_loss": 0.7292040586471558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41243189573287964, "epoch": 9.08, "learning_rate": 5.118812811120504e-06, "loss": 0.4351, "step": 10740, "task_loss": 0.4593050479888916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6958507895469666, "epoch": 9.08, "learning_rate": 5.114116652578191e-06, "loss": 0.6753, "step": 10741, "task_loss": 0.9971254467964172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3604472279548645, "epoch": 9.08, "learning_rate": 5.109420494035879e-06, "loss": 0.4339, "step": 10742, "task_loss": 0.7116667032241821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48808205127716064, "epoch": 9.08, "learning_rate": 5.104724335493566e-06, "loss": 0.621, "step": 10743, "task_loss": 0.549851655960083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6175312995910645, "epoch": 9.08, "learning_rate": 5.1000281769512544e-06, "loss": 0.4795, "step": 10744, "task_loss": 0.39771026372909546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6876251697540283, "epoch": 9.08, "learning_rate": 5.095332018408942e-06, "loss": 0.5378, "step": 10745, "task_loss": 0.7095063924789429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3493800759315491, "epoch": 9.08, "learning_rate": 5.090635859866629e-06, "loss": 0.4239, "step": 10746, "task_loss": 0.5308291912078857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5980483889579773, "epoch": 9.08, "learning_rate": 5.085939701324317e-06, "loss": 0.4596, "step": 10747, "task_loss": 1.4018864631652832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5578188896179199, "epoch": 9.09, "learning_rate": 5.081243542782005e-06, "loss": 0.6085, "step": 10748, "task_loss": 0.32747682929039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5482219457626343, "epoch": 9.09, "learning_rate": 5.076547384239692e-06, "loss": 0.554, "step": 10749, "task_loss": 0.8953731656074524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.417740136384964, "epoch": 9.09, "learning_rate": 5.07185122569738e-06, "loss": 0.4474, "step": 10750, "task_loss": 0.2875361740589142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31438520550727844, "epoch": 9.09, "learning_rate": 5.067155067155067e-06, "loss": 0.5223, "step": 10751, "task_loss": 0.5189372301101685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6024208068847656, "epoch": 9.09, "learning_rate": 5.062458908612755e-06, "loss": 0.5179, "step": 10752, "task_loss": 0.4638032019138336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5708364248275757, "epoch": 9.09, "learning_rate": 5.0577627500704425e-06, "loss": 0.4827, "step": 10753, "task_loss": 0.1948530077934265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6907607316970825, "epoch": 9.09, "learning_rate": 5.05306659152813e-06, "loss": 0.5072, "step": 10754, "task_loss": 1.1343395709991455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.20887979865074158, "epoch": 9.09, "learning_rate": 5.048370432985818e-06, "loss": 0.4373, "step": 10755, "task_loss": 0.29088878631591797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6817260384559631, "epoch": 9.09, "learning_rate": 5.043674274443506e-06, "loss": 0.6037, "step": 10756, "task_loss": 0.5630546808242798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31612634658813477, "epoch": 9.09, "learning_rate": 5.038978115901193e-06, "loss": 0.3722, "step": 10757, "task_loss": 0.3872344195842743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5615794658660889, "epoch": 9.09, "learning_rate": 5.034281957358881e-06, "loss": 0.5742, "step": 10758, "task_loss": 0.9556365013122559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32460489869117737, "epoch": 9.09, "learning_rate": 5.029585798816568e-06, "loss": 0.5435, "step": 10759, "task_loss": 0.4537678062915802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6611021757125854, "epoch": 9.1, "learning_rate": 5.024889640274256e-06, "loss": 0.4821, "step": 10760, "task_loss": 0.13576123118400574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27869537472724915, "epoch": 9.1, "learning_rate": 5.020193481731943e-06, "loss": 0.5909, "step": 10761, "task_loss": 0.10915900021791458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5214158892631531, "epoch": 9.1, "learning_rate": 5.0154973231896306e-06, "loss": 0.5027, "step": 10762, "task_loss": 0.8271827697753906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.410283625125885, "epoch": 9.1, "learning_rate": 5.0108011646473195e-06, "loss": 0.453, "step": 10763, "task_loss": 0.6782517433166504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4283534586429596, "epoch": 9.1, "learning_rate": 5.006105006105007e-06, "loss": 0.4515, "step": 10764, "task_loss": 0.30144602060317993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4615628719329834, "epoch": 9.1, "learning_rate": 5.001408847562694e-06, "loss": 0.6012, "step": 10765, "task_loss": 1.0345104932785034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45208173990249634, "epoch": 9.1, "learning_rate": 4.996712689020382e-06, "loss": 0.5824, "step": 10766, "task_loss": 0.47253862023353577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48497024178504944, "epoch": 9.1, "learning_rate": 4.992016530478069e-06, "loss": 0.5443, "step": 10767, "task_loss": 0.7656377553939819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5398471355438232, "epoch": 9.1, "learning_rate": 4.987320371935757e-06, "loss": 0.4873, "step": 10768, "task_loss": 0.4466264843940735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5354141592979431, "epoch": 9.1, "learning_rate": 4.982624213393444e-06, "loss": 0.5788, "step": 10769, "task_loss": 0.49572786688804626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6876121759414673, "epoch": 9.1, "learning_rate": 4.9779280548511315e-06, "loss": 0.52, "step": 10770, "task_loss": 0.25131386518478394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36163440346717834, "epoch": 9.1, "learning_rate": 4.97323189630882e-06, "loss": 0.4858, "step": 10771, "task_loss": 0.301727831363678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6130664348602295, "epoch": 9.11, "learning_rate": 4.9685357377665075e-06, "loss": 0.4711, "step": 10772, "task_loss": 1.1295173168182373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6440410017967224, "epoch": 9.11, "learning_rate": 4.963839579224195e-06, "loss": 0.4835, "step": 10773, "task_loss": 1.5382941961288452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.961685299873352, "epoch": 9.11, "learning_rate": 4.959143420681883e-06, "loss": 0.6353, "step": 10774, "task_loss": 0.9452662467956543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46775880455970764, "epoch": 9.11, "learning_rate": 4.95444726213957e-06, "loss": 0.6686, "step": 10775, "task_loss": 0.6920333504676819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7484368681907654, "epoch": 9.11, "learning_rate": 4.949751103597258e-06, "loss": 0.4762, "step": 10776, "task_loss": 0.5350260734558105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.364928126335144, "epoch": 9.11, "learning_rate": 4.945054945054945e-06, "loss": 0.7386, "step": 10777, "task_loss": 1.5735704898834229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5331242084503174, "epoch": 9.11, "learning_rate": 4.940358786512632e-06, "loss": 0.6295, "step": 10778, "task_loss": 0.9346872568130493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6216869354248047, "epoch": 9.11, "learning_rate": 4.935662627970321e-06, "loss": 0.5627, "step": 10779, "task_loss": 0.9751790761947632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42407143115997314, "epoch": 9.11, "learning_rate": 4.930966469428008e-06, "loss": 0.601, "step": 10780, "task_loss": 0.9411976337432861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3426010310649872, "epoch": 9.11, "learning_rate": 4.926270310885696e-06, "loss": 0.5182, "step": 10781, "task_loss": 0.9861934781074524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5789797902107239, "epoch": 9.11, "learning_rate": 4.921574152343384e-06, "loss": 0.5182, "step": 10782, "task_loss": 1.0251779556274414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3579273819923401, "epoch": 9.11, "learning_rate": 4.916877993801071e-06, "loss": 0.395, "step": 10783, "task_loss": 0.477798193693161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5452834367752075, "epoch": 9.12, "learning_rate": 4.912181835258758e-06, "loss": 0.4049, "step": 10784, "task_loss": 0.5050028562545776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.529124915599823, "epoch": 9.12, "learning_rate": 4.907485676716446e-06, "loss": 0.5845, "step": 10785, "task_loss": 0.9175763130187988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4466211795806885, "epoch": 9.12, "learning_rate": 4.902789518174133e-06, "loss": 0.5289, "step": 10786, "task_loss": 0.303467720746994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6210918426513672, "epoch": 9.12, "learning_rate": 4.898093359631821e-06, "loss": 0.5085, "step": 10787, "task_loss": 0.7774965763092041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7344993948936462, "epoch": 9.12, "learning_rate": 4.893397201089509e-06, "loss": 0.6211, "step": 10788, "task_loss": 0.4542602300643921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.809605062007904, "epoch": 9.12, "learning_rate": 4.8887010425471965e-06, "loss": 0.6622, "step": 10789, "task_loss": 1.3373280763626099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5311959981918335, "epoch": 9.12, "learning_rate": 4.8840048840048845e-06, "loss": 0.4835, "step": 10790, "task_loss": 0.7423132061958313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.613623321056366, "epoch": 9.12, "learning_rate": 4.879308725462572e-06, "loss": 0.4774, "step": 10791, "task_loss": 0.23567743599414825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45086318254470825, "epoch": 9.12, "learning_rate": 4.874612566920259e-06, "loss": 0.4708, "step": 10792, "task_loss": 0.9438279271125793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47940754890441895, "epoch": 9.12, "learning_rate": 4.869916408377947e-06, "loss": 0.5462, "step": 10793, "task_loss": 0.9598230719566345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44011586904525757, "epoch": 9.12, "learning_rate": 4.865220249835635e-06, "loss": 0.5569, "step": 10794, "task_loss": 0.4462777376174927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2606009542942047, "epoch": 9.13, "learning_rate": 4.860524091293322e-06, "loss": 0.4314, "step": 10795, "task_loss": 1.3644864559173584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4045115113258362, "epoch": 9.13, "learning_rate": 4.85582793275101e-06, "loss": 0.553, "step": 10796, "task_loss": 0.6645439267158508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5145822763442993, "epoch": 9.13, "learning_rate": 4.851131774208697e-06, "loss": 0.5556, "step": 10797, "task_loss": 0.30203819274902344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49476826190948486, "epoch": 9.13, "learning_rate": 4.846435615666385e-06, "loss": 0.5922, "step": 10798, "task_loss": 0.7888898849487305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3187045156955719, "epoch": 9.13, "learning_rate": 4.841739457124073e-06, "loss": 0.4091, "step": 10799, "task_loss": 0.871150016784668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41304677724838257, "epoch": 9.13, "learning_rate": 4.83704329858176e-06, "loss": 0.5678, "step": 10800, "task_loss": 0.39439013600349426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40353667736053467, "epoch": 9.13, "learning_rate": 4.832347140039448e-06, "loss": 0.4642, "step": 10801, "task_loss": 0.7110856175422668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6665796041488647, "epoch": 9.13, "learning_rate": 4.827650981497136e-06, "loss": 0.6285, "step": 10802, "task_loss": 1.9966217279434204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.248958021402359, "epoch": 9.13, "learning_rate": 4.822954822954823e-06, "loss": 0.4531, "step": 10803, "task_loss": 0.8183040618896484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3549577295780182, "epoch": 9.13, "learning_rate": 4.818258664412511e-06, "loss": 0.4451, "step": 10804, "task_loss": 0.3022654950618744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7812880873680115, "epoch": 9.13, "learning_rate": 4.813562505870198e-06, "loss": 0.7452, "step": 10805, "task_loss": 0.7758949995040894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3770272135734558, "epoch": 9.13, "learning_rate": 4.808866347327886e-06, "loss": 0.5867, "step": 10806, "task_loss": 0.6333507895469666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4899291396141052, "epoch": 9.14, "learning_rate": 4.8041701887855735e-06, "loss": 0.4291, "step": 10807, "task_loss": 0.5349928140640259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46991729736328125, "epoch": 9.14, "learning_rate": 4.799474030243261e-06, "loss": 0.6279, "step": 10808, "task_loss": 0.45564189553260803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44061020016670227, "epoch": 9.14, "learning_rate": 4.794777871700949e-06, "loss": 0.4951, "step": 10809, "task_loss": 0.5175603032112122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5305235385894775, "epoch": 9.14, "learning_rate": 4.790081713158637e-06, "loss": 0.5871, "step": 10810, "task_loss": 0.9809455275535583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4440699815750122, "epoch": 9.14, "learning_rate": 4.785385554616324e-06, "loss": 0.4607, "step": 10811, "task_loss": 0.985060453414917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32974326610565186, "epoch": 9.14, "learning_rate": 4.780689396074012e-06, "loss": 0.4587, "step": 10812, "task_loss": 0.2247323840856552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45507708191871643, "epoch": 9.14, "learning_rate": 4.775993237531699e-06, "loss": 0.3923, "step": 10813, "task_loss": 0.2305036038160324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6834571361541748, "epoch": 9.14, "learning_rate": 4.771297078989387e-06, "loss": 0.4809, "step": 10814, "task_loss": 0.15271535515785217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6583552360534668, "epoch": 9.14, "learning_rate": 4.766600920447074e-06, "loss": 0.4429, "step": 10815, "task_loss": 1.307707667350769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38142964243888855, "epoch": 9.14, "learning_rate": 4.7619047619047615e-06, "loss": 0.3829, "step": 10816, "task_loss": 0.6284894347190857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39593613147735596, "epoch": 9.14, "learning_rate": 4.75720860336245e-06, "loss": 0.5337, "step": 10817, "task_loss": 0.7197710275650024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4704158902168274, "epoch": 9.14, "learning_rate": 4.752512444820138e-06, "loss": 0.6222, "step": 10818, "task_loss": 0.6648381948471069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.21620702743530273, "epoch": 9.15, "learning_rate": 4.747816286277825e-06, "loss": 0.3483, "step": 10819, "task_loss": 0.281215637922287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5426162481307983, "epoch": 9.15, "learning_rate": 4.743120127735513e-06, "loss": 0.7216, "step": 10820, "task_loss": 0.6434688568115234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44350019097328186, "epoch": 9.15, "learning_rate": 4.7384239691932e-06, "loss": 0.4614, "step": 10821, "task_loss": 0.28932347893714905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6728520393371582, "epoch": 9.15, "learning_rate": 4.733727810650888e-06, "loss": 0.5553, "step": 10822, "task_loss": 0.5982152223587036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5577454566955566, "epoch": 9.15, "learning_rate": 4.729031652108575e-06, "loss": 0.6592, "step": 10823, "task_loss": 0.512154221534729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48018860816955566, "epoch": 9.15, "learning_rate": 4.724335493566262e-06, "loss": 0.4405, "step": 10824, "task_loss": 0.25719577074050903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9441595077514648, "epoch": 9.15, "learning_rate": 4.719639335023951e-06, "loss": 0.6368, "step": 10825, "task_loss": 0.5936612486839294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26125389337539673, "epoch": 9.15, "learning_rate": 4.7149431764816385e-06, "loss": 0.4995, "step": 10826, "task_loss": 0.23213624954223633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7618073225021362, "epoch": 9.15, "learning_rate": 4.710247017939326e-06, "loss": 0.6628, "step": 10827, "task_loss": 1.513561725616455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3622399568557739, "epoch": 9.15, "learning_rate": 4.705550859397014e-06, "loss": 0.482, "step": 10828, "task_loss": 0.3180501163005829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43298834562301636, "epoch": 9.15, "learning_rate": 4.700854700854701e-06, "loss": 0.5143, "step": 10829, "task_loss": 0.23392625153064728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6041392087936401, "epoch": 9.15, "learning_rate": 4.696158542312389e-06, "loss": 0.4914, "step": 10830, "task_loss": 2.0075674057006836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.30115532875061035, "epoch": 9.16, "learning_rate": 4.691462383770076e-06, "loss": 0.5182, "step": 10831, "task_loss": 0.6879397630691528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6114578247070312, "epoch": 9.16, "learning_rate": 4.686766225227763e-06, "loss": 0.6387, "step": 10832, "task_loss": 1.0479676723480225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6310089826583862, "epoch": 9.16, "learning_rate": 4.682070066685452e-06, "loss": 0.5916, "step": 10833, "task_loss": 1.0106794834136963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43346118927001953, "epoch": 9.16, "learning_rate": 4.677373908143139e-06, "loss": 0.3961, "step": 10834, "task_loss": 0.6457552313804626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42287886142730713, "epoch": 9.16, "learning_rate": 4.6726777496008265e-06, "loss": 0.6712, "step": 10835, "task_loss": 0.5254179239273071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5991325974464417, "epoch": 9.16, "learning_rate": 4.667981591058515e-06, "loss": 0.7843, "step": 10836, "task_loss": 0.7200884819030762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8336564302444458, "epoch": 9.16, "learning_rate": 4.663285432516202e-06, "loss": 0.5139, "step": 10837, "task_loss": 1.1541225910186768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3764846920967102, "epoch": 9.16, "learning_rate": 4.65858927397389e-06, "loss": 0.5736, "step": 10838, "task_loss": 0.23537102341651917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5132734775543213, "epoch": 9.16, "learning_rate": 4.653893115431577e-06, "loss": 0.5742, "step": 10839, "task_loss": 0.41589945554733276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7671801447868347, "epoch": 9.16, "learning_rate": 4.649196956889264e-06, "loss": 0.5647, "step": 10840, "task_loss": 0.5173789262771606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5668511390686035, "epoch": 9.16, "learning_rate": 4.644500798346953e-06, "loss": 0.6226, "step": 10841, "task_loss": 0.6880378127098083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3632248044013977, "epoch": 9.16, "learning_rate": 4.63980463980464e-06, "loss": 0.6327, "step": 10842, "task_loss": 0.3302665948867798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6125246286392212, "epoch": 9.17, "learning_rate": 4.6351084812623274e-06, "loss": 0.4758, "step": 10843, "task_loss": 1.1953169107437134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6988461017608643, "epoch": 9.17, "learning_rate": 4.6304123227200155e-06, "loss": 0.7066, "step": 10844, "task_loss": 0.4654392898082733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3700801730155945, "epoch": 9.17, "learning_rate": 4.625716164177703e-06, "loss": 0.6171, "step": 10845, "task_loss": 0.251496285200119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6136988401412964, "epoch": 9.17, "learning_rate": 4.621020005635391e-06, "loss": 0.5257, "step": 10846, "task_loss": 0.773097038269043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5662164688110352, "epoch": 9.17, "learning_rate": 4.616323847093078e-06, "loss": 0.4918, "step": 10847, "task_loss": 0.8952475190162659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35259389877319336, "epoch": 9.17, "learning_rate": 4.611627688550766e-06, "loss": 0.3655, "step": 10848, "task_loss": 0.4675857424736023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5494815111160278, "epoch": 9.17, "learning_rate": 4.606931530008454e-06, "loss": 0.4753, "step": 10849, "task_loss": 0.35709148645401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5236245393753052, "epoch": 9.17, "learning_rate": 4.602235371466141e-06, "loss": 0.5202, "step": 10850, "task_loss": 1.0402954816818237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4620223343372345, "epoch": 9.17, "learning_rate": 4.597539212923828e-06, "loss": 0.4793, "step": 10851, "task_loss": 0.6145092248916626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7146470546722412, "epoch": 9.17, "learning_rate": 4.592843054381516e-06, "loss": 0.5245, "step": 10852, "task_loss": 0.8255802989006042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4474940299987793, "epoch": 9.17, "learning_rate": 4.5881468958392035e-06, "loss": 0.5972, "step": 10853, "task_loss": 1.2063347101211548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7762824296951294, "epoch": 9.17, "learning_rate": 4.5834507372968916e-06, "loss": 0.6269, "step": 10854, "task_loss": 0.8162869215011597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4945563077926636, "epoch": 9.18, "learning_rate": 4.578754578754579e-06, "loss": 0.5613, "step": 10855, "task_loss": 1.1555442810058594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5933083295822144, "epoch": 9.18, "learning_rate": 4.574058420212267e-06, "loss": 0.5415, "step": 10856, "task_loss": 1.3540757894515991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42897528409957886, "epoch": 9.18, "learning_rate": 4.569362261669955e-06, "loss": 0.5136, "step": 10857, "task_loss": 0.1611291766166687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5386508703231812, "epoch": 9.18, "learning_rate": 4.564666103127642e-06, "loss": 0.5027, "step": 10858, "task_loss": 0.3474172353744507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29609376192092896, "epoch": 9.18, "learning_rate": 4.559969944585329e-06, "loss": 0.4124, "step": 10859, "task_loss": 0.4425196051597595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27288979291915894, "epoch": 9.18, "learning_rate": 4.555273786043017e-06, "loss": 0.4163, "step": 10860, "task_loss": 0.5429377555847168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3799418807029724, "epoch": 9.18, "learning_rate": 4.550577627500704e-06, "loss": 0.5034, "step": 10861, "task_loss": 0.3593513071537018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4575740098953247, "epoch": 9.18, "learning_rate": 4.545881468958392e-06, "loss": 0.5253, "step": 10862, "task_loss": 0.19998013973236084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4829702377319336, "epoch": 9.18, "learning_rate": 4.54118531041608e-06, "loss": 0.4202, "step": 10863, "task_loss": 0.43867790699005127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3672296702861786, "epoch": 9.18, "learning_rate": 4.536489151873768e-06, "loss": 0.494, "step": 10864, "task_loss": 1.3404605388641357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6341596841812134, "epoch": 9.18, "learning_rate": 4.531792993331455e-06, "loss": 0.5524, "step": 10865, "task_loss": 0.7552019357681274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42355993390083313, "epoch": 9.19, "learning_rate": 4.527096834789143e-06, "loss": 0.4507, "step": 10866, "task_loss": 1.6136534214019775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2771432399749756, "epoch": 9.19, "learning_rate": 4.52240067624683e-06, "loss": 0.4926, "step": 10867, "task_loss": 0.5457552671432495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4583578109741211, "epoch": 9.19, "learning_rate": 4.517704517704518e-06, "loss": 0.4873, "step": 10868, "task_loss": 0.5047423243522644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.710797905921936, "epoch": 9.19, "learning_rate": 4.513008359162205e-06, "loss": 0.5301, "step": 10869, "task_loss": 0.6324367523193359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6359922885894775, "epoch": 9.19, "learning_rate": 4.5083122006198925e-06, "loss": 0.5434, "step": 10870, "task_loss": 0.94767826795578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5624160766601562, "epoch": 9.19, "learning_rate": 4.503616042077581e-06, "loss": 0.5864, "step": 10871, "task_loss": 1.2750481367111206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5518039464950562, "epoch": 9.19, "learning_rate": 4.4989198835352685e-06, "loss": 0.6828, "step": 10872, "task_loss": 1.090867280960083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.28236958384513855, "epoch": 9.19, "learning_rate": 4.494223724992956e-06, "loss": 0.5054, "step": 10873, "task_loss": 0.18523308634757996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3802758753299713, "epoch": 9.19, "learning_rate": 4.489527566450644e-06, "loss": 0.5124, "step": 10874, "task_loss": 1.24318265914917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4001586139202118, "epoch": 9.19, "learning_rate": 4.484831407908331e-06, "loss": 0.475, "step": 10875, "task_loss": 0.4499785006046295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37857383489608765, "epoch": 9.19, "learning_rate": 4.480135249366019e-06, "loss": 0.6017, "step": 10876, "task_loss": 1.0561765432357788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5126084685325623, "epoch": 9.19, "learning_rate": 4.475439090823706e-06, "loss": 0.4688, "step": 10877, "task_loss": 0.47618067264556885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37914741039276123, "epoch": 9.2, "learning_rate": 4.470742932281393e-06, "loss": 0.56, "step": 10878, "task_loss": 0.12554247677326202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.710652768611908, "epoch": 9.2, "learning_rate": 4.466046773739082e-06, "loss": 0.5488, "step": 10879, "task_loss": 1.8915144205093384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4023880362510681, "epoch": 9.2, "learning_rate": 4.4613506151967694e-06, "loss": 0.5273, "step": 10880, "task_loss": 0.14976464211940765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46554404497146606, "epoch": 9.2, "learning_rate": 4.456654456654457e-06, "loss": 0.4943, "step": 10881, "task_loss": 0.4651232659816742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5860620141029358, "epoch": 9.2, "learning_rate": 4.451958298112145e-06, "loss": 0.5371, "step": 10882, "task_loss": 0.7723259925842285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6260709166526794, "epoch": 9.2, "learning_rate": 4.447262139569832e-06, "loss": 0.6055, "step": 10883, "task_loss": 0.8701730966567993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6279199123382568, "epoch": 9.2, "learning_rate": 4.44256598102752e-06, "loss": 0.5566, "step": 10884, "task_loss": 0.687610924243927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33580493927001953, "epoch": 9.2, "learning_rate": 4.437869822485207e-06, "loss": 0.5136, "step": 10885, "task_loss": 0.07880933582782745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33558982610702515, "epoch": 9.2, "learning_rate": 4.433173663942894e-06, "loss": 0.5686, "step": 10886, "task_loss": 0.630238950252533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7806136012077332, "epoch": 9.2, "learning_rate": 4.428477505400583e-06, "loss": 0.5781, "step": 10887, "task_loss": 0.8771222829818726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.581695556640625, "epoch": 9.2, "learning_rate": 4.42378134685827e-06, "loss": 0.5603, "step": 10888, "task_loss": 1.5130608081817627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5424028038978577, "epoch": 9.2, "learning_rate": 4.4190851883159575e-06, "loss": 0.5751, "step": 10889, "task_loss": 0.41587764024734497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4843243360519409, "epoch": 9.21, "learning_rate": 4.4143890297736455e-06, "loss": 0.5861, "step": 10890, "task_loss": 1.1868025064468384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4884148836135864, "epoch": 9.21, "learning_rate": 4.409692871231333e-06, "loss": 0.5578, "step": 10891, "task_loss": 0.32005393505096436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2815285921096802, "epoch": 9.21, "learning_rate": 4.404996712689021e-06, "loss": 0.6753, "step": 10892, "task_loss": 0.834860622882843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.885529637336731, "epoch": 9.21, "learning_rate": 4.400300554146708e-06, "loss": 0.572, "step": 10893, "task_loss": 1.05112624168396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4920681118965149, "epoch": 9.21, "learning_rate": 4.395604395604396e-06, "loss": 0.4665, "step": 10894, "task_loss": 0.6899709701538086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46480298042297363, "epoch": 9.21, "learning_rate": 4.390908237062084e-06, "loss": 0.4772, "step": 10895, "task_loss": 0.31658485531806946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43898552656173706, "epoch": 9.21, "learning_rate": 4.386212078519771e-06, "loss": 0.5895, "step": 10896, "task_loss": 0.9232686758041382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5286633968353271, "epoch": 9.21, "learning_rate": 4.381515919977458e-06, "loss": 0.3755, "step": 10897, "task_loss": 0.4323328733444214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5468287467956543, "epoch": 9.21, "learning_rate": 4.376819761435146e-06, "loss": 0.4656, "step": 10898, "task_loss": 0.36354464292526245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7020459175109863, "epoch": 9.21, "learning_rate": 4.372123602892834e-06, "loss": 0.6185, "step": 10899, "task_loss": 1.2318261861801147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5509000420570374, "epoch": 9.21, "learning_rate": 4.367427444350522e-06, "loss": 0.5089, "step": 10900, "task_loss": 1.041623592376709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42106324434280396, "epoch": 9.21, "learning_rate": 4.362731285808209e-06, "loss": 0.5485, "step": 10901, "task_loss": 0.955953061580658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.439687043428421, "epoch": 9.22, "learning_rate": 4.358035127265897e-06, "loss": 0.502, "step": 10902, "task_loss": 0.5711559057235718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3878926634788513, "epoch": 9.22, "learning_rate": 4.353338968723585e-06, "loss": 0.5809, "step": 10903, "task_loss": 0.3408116102218628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4243980348110199, "epoch": 9.22, "learning_rate": 4.348642810181272e-06, "loss": 0.5568, "step": 10904, "task_loss": 0.35001444816589355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3836745023727417, "epoch": 9.22, "learning_rate": 4.343946651638959e-06, "loss": 0.5257, "step": 10905, "task_loss": 0.6191925406455994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3753714859485626, "epoch": 9.22, "learning_rate": 4.339250493096647e-06, "loss": 0.4475, "step": 10906, "task_loss": 0.874995231628418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5447896122932434, "epoch": 9.22, "learning_rate": 4.3345543345543345e-06, "loss": 0.5972, "step": 10907, "task_loss": 0.7510464787483215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3877883851528168, "epoch": 9.22, "learning_rate": 4.3298581760120225e-06, "loss": 0.4025, "step": 10908, "task_loss": 0.5335994362831116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7354617714881897, "epoch": 9.22, "learning_rate": 4.32516201746971e-06, "loss": 0.5601, "step": 10909, "task_loss": 1.0108484029769897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4778059720993042, "epoch": 9.22, "learning_rate": 4.320465858927398e-06, "loss": 0.5548, "step": 10910, "task_loss": 1.0227872133255005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4960688352584839, "epoch": 9.22, "learning_rate": 4.315769700385086e-06, "loss": 0.5233, "step": 10911, "task_loss": 0.9513017535209656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42641371488571167, "epoch": 9.22, "learning_rate": 4.311073541842773e-06, "loss": 0.5213, "step": 10912, "task_loss": 0.509032666683197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4839319884777069, "epoch": 9.22, "learning_rate": 4.30637738330046e-06, "loss": 0.5091, "step": 10913, "task_loss": 0.653652548789978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45131999254226685, "epoch": 9.23, "learning_rate": 4.301681224758148e-06, "loss": 0.5556, "step": 10914, "task_loss": 1.0802311897277832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32186490297317505, "epoch": 9.23, "learning_rate": 4.296985066215835e-06, "loss": 0.3757, "step": 10915, "task_loss": 0.617271900177002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8242195844650269, "epoch": 9.23, "learning_rate": 4.292288907673523e-06, "loss": 0.5648, "step": 10916, "task_loss": 1.2773592472076416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6480286121368408, "epoch": 9.23, "learning_rate": 4.287592749131211e-06, "loss": 0.5493, "step": 10917, "task_loss": 1.0887995958328247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4379591941833496, "epoch": 9.23, "learning_rate": 4.282896590588899e-06, "loss": 0.5968, "step": 10918, "task_loss": 0.30384403467178345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47425395250320435, "epoch": 9.23, "learning_rate": 4.278200432046587e-06, "loss": 0.5302, "step": 10919, "task_loss": 1.0378401279449463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.68454909324646, "epoch": 9.23, "learning_rate": 4.273504273504274e-06, "loss": 0.5199, "step": 10920, "task_loss": 0.7695238590240479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5275671482086182, "epoch": 9.23, "learning_rate": 4.268808114961961e-06, "loss": 0.446, "step": 10921, "task_loss": 0.7664257287979126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5585132241249084, "epoch": 9.23, "learning_rate": 4.264111956419649e-06, "loss": 0.6194, "step": 10922, "task_loss": 0.7371728420257568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8217506408691406, "epoch": 9.23, "learning_rate": 4.259415797877336e-06, "loss": 0.6707, "step": 10923, "task_loss": 1.3436232805252075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.19976487755775452, "epoch": 9.23, "learning_rate": 4.254719639335024e-06, "loss": 0.3468, "step": 10924, "task_loss": 0.20512662827968597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.54300457239151, "epoch": 9.23, "learning_rate": 4.250023480792712e-06, "loss": 0.4533, "step": 10925, "task_loss": 0.9582623243331909 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.457436740398407, "epoch": 9.24, "learning_rate": 4.2453273222503995e-06, "loss": 0.4931, "step": 10926, "task_loss": 0.7386743426322937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5815085768699646, "epoch": 9.24, "learning_rate": 4.2406311637080875e-06, "loss": 0.3916, "step": 10927, "task_loss": 0.12617357075214386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.498002827167511, "epoch": 9.24, "learning_rate": 4.235935005165775e-06, "loss": 0.4978, "step": 10928, "task_loss": 0.7038575410842896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3074437975883484, "epoch": 9.24, "learning_rate": 4.231238846623462e-06, "loss": 0.3831, "step": 10929, "task_loss": 0.2876688539981842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4183152914047241, "epoch": 9.24, "learning_rate": 4.22654268808115e-06, "loss": 0.5614, "step": 10930, "task_loss": 0.5910884141921997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5642260909080505, "epoch": 9.24, "learning_rate": 4.221846529538837e-06, "loss": 0.5072, "step": 10931, "task_loss": 1.3438327312469482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5410130620002747, "epoch": 9.24, "learning_rate": 4.217150370996525e-06, "loss": 0.5565, "step": 10932, "task_loss": 1.1669981479644775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4439947009086609, "epoch": 9.24, "learning_rate": 4.212454212454213e-06, "loss": 0.4057, "step": 10933, "task_loss": 0.6332880258560181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3508094549179077, "epoch": 9.24, "learning_rate": 4.2077580539119e-06, "loss": 0.3698, "step": 10934, "task_loss": 0.5008771419525146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.358085036277771, "epoch": 9.24, "learning_rate": 4.203061895369588e-06, "loss": 0.4411, "step": 10935, "task_loss": 0.6771661639213562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9107546806335449, "epoch": 9.24, "learning_rate": 4.198365736827276e-06, "loss": 0.6343, "step": 10936, "task_loss": 0.8680806159973145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.723254919052124, "epoch": 9.24, "learning_rate": 4.193669578284963e-06, "loss": 0.5474, "step": 10937, "task_loss": 0.8167060613632202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6148167848587036, "epoch": 9.25, "learning_rate": 4.188973419742651e-06, "loss": 0.5835, "step": 10938, "task_loss": 0.45713505148887634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0441827774047852, "epoch": 9.25, "learning_rate": 4.184277261200338e-06, "loss": 0.6824, "step": 10939, "task_loss": 1.0055031776428223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6809828281402588, "epoch": 9.25, "learning_rate": 4.179581102658025e-06, "loss": 0.5759, "step": 10940, "task_loss": 0.33073368668556213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6073811650276184, "epoch": 9.25, "learning_rate": 4.174884944115714e-06, "loss": 0.5474, "step": 10941, "task_loss": 1.5066643953323364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3849838376045227, "epoch": 9.25, "learning_rate": 4.170188785573401e-06, "loss": 0.4798, "step": 10942, "task_loss": 0.46167248487472534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5495039224624634, "epoch": 9.25, "learning_rate": 4.1654926270310885e-06, "loss": 0.4352, "step": 10943, "task_loss": 0.8963119983673096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4797786474227905, "epoch": 9.25, "learning_rate": 4.1607964684887765e-06, "loss": 0.4445, "step": 10944, "task_loss": 0.33392512798309326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9741760492324829, "epoch": 9.25, "learning_rate": 4.156100309946464e-06, "loss": 0.5784, "step": 10945, "task_loss": 1.3701093196868896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7125933170318604, "epoch": 9.25, "learning_rate": 4.151404151404152e-06, "loss": 0.642, "step": 10946, "task_loss": 1.0760340690612793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4682626724243164, "epoch": 9.25, "learning_rate": 4.146707992861839e-06, "loss": 0.5261, "step": 10947, "task_loss": 0.6496219635009766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7940568327903748, "epoch": 9.25, "learning_rate": 4.142011834319527e-06, "loss": 0.5784, "step": 10948, "task_loss": 1.4794553518295288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4707946181297302, "epoch": 9.26, "learning_rate": 4.137315675777215e-06, "loss": 0.5205, "step": 10949, "task_loss": 1.4109915494918823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.25691619515419006, "epoch": 9.26, "learning_rate": 4.132619517234902e-06, "loss": 0.5237, "step": 10950, "task_loss": 0.9998692274093628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38530975580215454, "epoch": 9.26, "learning_rate": 4.127923358692589e-06, "loss": 0.6966, "step": 10951, "task_loss": 0.534818172454834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4488954544067383, "epoch": 9.26, "learning_rate": 4.123227200150277e-06, "loss": 0.4167, "step": 10952, "task_loss": 1.065750241279602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46248558163642883, "epoch": 9.26, "learning_rate": 4.1185310416079646e-06, "loss": 0.6371, "step": 10953, "task_loss": 0.7042678594589233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43129611015319824, "epoch": 9.26, "learning_rate": 4.113834883065653e-06, "loss": 0.5673, "step": 10954, "task_loss": 1.0320563316345215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2740740478038788, "epoch": 9.26, "learning_rate": 4.10913872452334e-06, "loss": 0.4364, "step": 10955, "task_loss": 0.8866948485374451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4280799329280853, "epoch": 9.26, "learning_rate": 4.104442565981028e-06, "loss": 0.4474, "step": 10956, "task_loss": 1.557889461517334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4958016872406006, "epoch": 9.26, "learning_rate": 4.099746407438716e-06, "loss": 0.6632, "step": 10957, "task_loss": 0.5678724646568298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3938402235507965, "epoch": 9.26, "learning_rate": 4.095050248896403e-06, "loss": 0.5745, "step": 10958, "task_loss": 0.23073500394821167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6232110261917114, "epoch": 9.26, "learning_rate": 4.09035409035409e-06, "loss": 0.5918, "step": 10959, "task_loss": 0.1126079261302948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46373867988586426, "epoch": 9.26, "learning_rate": 4.085657931811778e-06, "loss": 0.463, "step": 10960, "task_loss": 0.19222627580165863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4513087570667267, "epoch": 9.27, "learning_rate": 4.0809617732694654e-06, "loss": 0.5304, "step": 10961, "task_loss": 1.2493211030960083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4008535146713257, "epoch": 9.27, "learning_rate": 4.0762656147271535e-06, "loss": 0.4612, "step": 10962, "task_loss": 0.200352281332016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.655732274055481, "epoch": 9.27, "learning_rate": 4.071569456184841e-06, "loss": 0.4995, "step": 10963, "task_loss": 0.6046756505966187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6062896251678467, "epoch": 9.27, "learning_rate": 4.066873297642529e-06, "loss": 0.4436, "step": 10964, "task_loss": 0.5521926879882812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34697747230529785, "epoch": 9.27, "learning_rate": 4.062177139100217e-06, "loss": 0.5301, "step": 10965, "task_loss": 0.5511643290519714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5486181378364563, "epoch": 9.27, "learning_rate": 4.057480980557904e-06, "loss": 0.606, "step": 10966, "task_loss": 0.97260981798172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7581465244293213, "epoch": 9.27, "learning_rate": 4.052784822015591e-06, "loss": 0.4871, "step": 10967, "task_loss": 0.7063884735107422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6156717538833618, "epoch": 9.27, "learning_rate": 4.048088663473279e-06, "loss": 0.6359, "step": 10968, "task_loss": 1.002740502357483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.582972526550293, "epoch": 9.27, "learning_rate": 4.043392504930966e-06, "loss": 0.4933, "step": 10969, "task_loss": 1.047956109046936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46019911766052246, "epoch": 9.27, "learning_rate": 4.038696346388654e-06, "loss": 0.5371, "step": 10970, "task_loss": 0.5463063716888428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3875397741794586, "epoch": 9.27, "learning_rate": 4.034000187846342e-06, "loss": 0.4459, "step": 10971, "task_loss": 1.2539654970169067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.28282636404037476, "epoch": 9.27, "learning_rate": 4.0293040293040296e-06, "loss": 0.473, "step": 10972, "task_loss": 0.16360482573509216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3565637171268463, "epoch": 9.28, "learning_rate": 4.024607870761718e-06, "loss": 0.4225, "step": 10973, "task_loss": 0.416798859834671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35586947202682495, "epoch": 9.28, "learning_rate": 4.019911712219405e-06, "loss": 0.4583, "step": 10974, "task_loss": 0.3219599723815918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6135491728782654, "epoch": 9.28, "learning_rate": 4.015215553677092e-06, "loss": 0.5756, "step": 10975, "task_loss": 1.1901389360427856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7292443513870239, "epoch": 9.28, "learning_rate": 4.01051939513478e-06, "loss": 0.604, "step": 10976, "task_loss": 0.6044957041740417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5190645456314087, "epoch": 9.28, "learning_rate": 4.005823236592467e-06, "loss": 0.4235, "step": 10977, "task_loss": 0.5378069281578064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3543566167354584, "epoch": 9.28, "learning_rate": 4.001127078050155e-06, "loss": 0.5649, "step": 10978, "task_loss": 0.5323916077613831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5802221298217773, "epoch": 9.28, "learning_rate": 3.996430919507843e-06, "loss": 0.48, "step": 10979, "task_loss": 0.9291883707046509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4898128807544708, "epoch": 9.28, "learning_rate": 3.9917347609655305e-06, "loss": 0.458, "step": 10980, "task_loss": 0.6900172829627991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9053933620452881, "epoch": 9.28, "learning_rate": 3.9870386024232185e-06, "loss": 0.5724, "step": 10981, "task_loss": 2.1028645038604736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43069595098495483, "epoch": 9.28, "learning_rate": 3.982342443880906e-06, "loss": 0.509, "step": 10982, "task_loss": 0.2197617143392563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8263747096061707, "epoch": 9.28, "learning_rate": 3.977646285338593e-06, "loss": 0.6964, "step": 10983, "task_loss": 1.431894063949585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5364647507667542, "epoch": 9.28, "learning_rate": 3.972950126796281e-06, "loss": 0.4614, "step": 10984, "task_loss": 1.0317654609680176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.1934036761522293, "epoch": 9.29, "learning_rate": 3.968253968253968e-06, "loss": 0.4796, "step": 10985, "task_loss": 0.18156766891479492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.521465003490448, "epoch": 9.29, "learning_rate": 3.963557809711656e-06, "loss": 0.5159, "step": 10986, "task_loss": 0.3325605094432831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6312697529792786, "epoch": 9.29, "learning_rate": 3.958861651169344e-06, "loss": 0.6173, "step": 10987, "task_loss": 0.7861542701721191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3511209487915039, "epoch": 9.29, "learning_rate": 3.954165492627031e-06, "loss": 0.4679, "step": 10988, "task_loss": 0.6875053644180298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3111460208892822, "epoch": 9.29, "learning_rate": 3.949469334084719e-06, "loss": 0.591, "step": 10989, "task_loss": 0.019017482176423073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33890706300735474, "epoch": 9.29, "learning_rate": 3.9447731755424066e-06, "loss": 0.6341, "step": 10990, "task_loss": 0.5327935814857483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47418761253356934, "epoch": 9.29, "learning_rate": 3.940077017000094e-06, "loss": 0.5198, "step": 10991, "task_loss": 1.358612298965454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.289068341255188, "epoch": 9.29, "learning_rate": 3.935380858457782e-06, "loss": 0.5549, "step": 10992, "task_loss": 0.30642926692962646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7989042401313782, "epoch": 9.29, "learning_rate": 3.930684699915469e-06, "loss": 0.5866, "step": 10993, "task_loss": 1.0353713035583496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3630967140197754, "epoch": 9.29, "learning_rate": 3.925988541373157e-06, "loss": 0.4501, "step": 10994, "task_loss": 0.6484398245811462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0485355854034424, "epoch": 9.29, "learning_rate": 3.921292382830845e-06, "loss": 0.5237, "step": 10995, "task_loss": 0.8715893626213074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38944506645202637, "epoch": 9.29, "learning_rate": 3.916596224288532e-06, "loss": 0.4205, "step": 10996, "task_loss": 0.4776269793510437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5434913635253906, "epoch": 9.3, "learning_rate": 3.91190006574622e-06, "loss": 0.6227, "step": 10997, "task_loss": 0.8531421422958374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5688161849975586, "epoch": 9.3, "learning_rate": 3.9072039072039074e-06, "loss": 0.4585, "step": 10998, "task_loss": 1.0414435863494873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5777052640914917, "epoch": 9.3, "learning_rate": 3.902507748661595e-06, "loss": 0.3845, "step": 10999, "task_loss": 0.1922408640384674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6573265790939331, "epoch": 9.3, "learning_rate": 3.897811590119283e-06, "loss": 0.5442, "step": 11000, "task_loss": 0.37191492319107056 }, { "epoch": 9.3, "eval_accuracy": 0.9053069306930693, "eval_loss": 0.346221923828125, "eval_runtime": 224.1149, "eval_samples_per_second": 112.665, "eval_steps_per_second": 0.883, "step": 11000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7361946105957031, "epoch": 9.3, "learning_rate": 3.89311543157697e-06, "loss": 0.6245, "step": 11001, "task_loss": 0.4143327474594116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5425702333450317, "epoch": 9.3, "learning_rate": 3.888419273034658e-06, "loss": 0.5476, "step": 11002, "task_loss": 0.9952729344367981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4616251289844513, "epoch": 9.3, "learning_rate": 3.883723114492346e-06, "loss": 0.5346, "step": 11003, "task_loss": 1.0486862659454346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9784385561943054, "epoch": 9.3, "learning_rate": 3.879026955950033e-06, "loss": 0.5333, "step": 11004, "task_loss": 1.182749629020691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38427016139030457, "epoch": 9.3, "learning_rate": 3.874330797407721e-06, "loss": 0.4942, "step": 11005, "task_loss": 0.562197208404541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6125396490097046, "epoch": 9.3, "learning_rate": 3.869634638865408e-06, "loss": 0.5654, "step": 11006, "task_loss": 0.21159914135932922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9802566766738892, "epoch": 9.3, "learning_rate": 3.8649384803230955e-06, "loss": 0.7256, "step": 11007, "task_loss": 0.8843002319335938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48365503549575806, "epoch": 9.3, "learning_rate": 3.8602423217807835e-06, "loss": 0.4109, "step": 11008, "task_loss": 0.7952879667282104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.25382012128829956, "epoch": 9.31, "learning_rate": 3.855546163238471e-06, "loss": 0.4747, "step": 11009, "task_loss": 0.4312363862991333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48168832063674927, "epoch": 9.31, "learning_rate": 3.850850004696159e-06, "loss": 0.4687, "step": 11010, "task_loss": 0.6604706645011902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36046215891838074, "epoch": 9.31, "learning_rate": 3.846153846153847e-06, "loss": 0.5504, "step": 11011, "task_loss": 0.2108384668827057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7622984647750854, "epoch": 9.31, "learning_rate": 3.841457687611534e-06, "loss": 0.5724, "step": 11012, "task_loss": 0.4347253143787384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38085782527923584, "epoch": 9.31, "learning_rate": 3.836761529069222e-06, "loss": 0.4553, "step": 11013, "task_loss": 0.15617288649082184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6770353317260742, "epoch": 9.31, "learning_rate": 3.832065370526909e-06, "loss": 0.5324, "step": 11014, "task_loss": 0.48535141348838806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41669005155563354, "epoch": 9.31, "learning_rate": 3.827369211984596e-06, "loss": 0.5044, "step": 11015, "task_loss": 0.8084886074066162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4541943073272705, "epoch": 9.31, "learning_rate": 3.822673053442284e-06, "loss": 0.5276, "step": 11016, "task_loss": 0.7575679421424866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49252888560295105, "epoch": 9.31, "learning_rate": 3.817976894899972e-06, "loss": 0.5284, "step": 11017, "task_loss": 0.6332487463951111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48284557461738586, "epoch": 9.31, "learning_rate": 3.81328073635766e-06, "loss": 0.491, "step": 11018, "task_loss": 1.1853697299957275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3176674246788025, "epoch": 9.31, "learning_rate": 3.8085845778153473e-06, "loss": 0.4627, "step": 11019, "task_loss": 0.6629073023796082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6326193809509277, "epoch": 9.32, "learning_rate": 3.803888419273035e-06, "loss": 0.601, "step": 11020, "task_loss": 0.813741147518158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4399182200431824, "epoch": 9.32, "learning_rate": 3.7991922607307225e-06, "loss": 0.4854, "step": 11021, "task_loss": 0.4467892646789551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39014315605163574, "epoch": 9.32, "learning_rate": 3.79449610218841e-06, "loss": 0.5295, "step": 11022, "task_loss": 0.6553146839141846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6033489108085632, "epoch": 9.32, "learning_rate": 3.7897999436460977e-06, "loss": 0.445, "step": 11023, "task_loss": 0.2966165542602539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1739720106124878, "epoch": 9.32, "learning_rate": 3.785103785103785e-06, "loss": 0.7361, "step": 11024, "task_loss": 1.3811548948287964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8864316940307617, "epoch": 9.32, "learning_rate": 3.7804076265614733e-06, "loss": 0.5092, "step": 11025, "task_loss": 1.2984658479690552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38166162371635437, "epoch": 9.32, "learning_rate": 3.775711468019161e-06, "loss": 0.6111, "step": 11026, "task_loss": 0.8544777631759644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5775938630104065, "epoch": 9.32, "learning_rate": 3.771015309476848e-06, "loss": 0.4744, "step": 11027, "task_loss": 1.1608219146728516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7698391675949097, "epoch": 9.32, "learning_rate": 3.7663191509345357e-06, "loss": 0.5638, "step": 11028, "task_loss": 0.9132817983627319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31351619958877563, "epoch": 9.32, "learning_rate": 3.7616229923922234e-06, "loss": 0.5295, "step": 11029, "task_loss": 1.0117034912109375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7270700931549072, "epoch": 9.32, "learning_rate": 3.756926833849911e-06, "loss": 0.4799, "step": 11030, "task_loss": 0.6803945302963257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5998459458351135, "epoch": 9.32, "learning_rate": 3.7522306753075986e-06, "loss": 0.5915, "step": 11031, "task_loss": 0.45643308758735657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4621542692184448, "epoch": 9.33, "learning_rate": 3.7475345167652858e-06, "loss": 0.5089, "step": 11032, "task_loss": 1.1074153184890747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4240649342536926, "epoch": 9.33, "learning_rate": 3.7428383582229742e-06, "loss": 0.5292, "step": 11033, "task_loss": 0.43651676177978516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7895039916038513, "epoch": 9.33, "learning_rate": 3.738142199680662e-06, "loss": 0.7574, "step": 11034, "task_loss": 0.6352007389068604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5958175659179688, "epoch": 9.33, "learning_rate": 3.733446041138349e-06, "loss": 0.4418, "step": 11035, "task_loss": 1.1429487466812134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.446782648563385, "epoch": 9.33, "learning_rate": 3.7287498825960366e-06, "loss": 0.5509, "step": 11036, "task_loss": 0.8445690274238586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27880439162254333, "epoch": 9.33, "learning_rate": 3.7240537240537242e-06, "loss": 0.4049, "step": 11037, "task_loss": 0.3872423470020294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7785201072692871, "epoch": 9.33, "learning_rate": 3.719357565511412e-06, "loss": 0.4979, "step": 11038, "task_loss": 1.2808732986450195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5452598333358765, "epoch": 9.33, "learning_rate": 3.714661406969099e-06, "loss": 0.5235, "step": 11039, "task_loss": 0.7234621047973633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6075134873390198, "epoch": 9.33, "learning_rate": 3.7099652484267866e-06, "loss": 0.5602, "step": 11040, "task_loss": 0.8149653077125549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35684067010879517, "epoch": 9.33, "learning_rate": 3.705269089884475e-06, "loss": 0.549, "step": 11041, "task_loss": 0.5597811937332153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5037657022476196, "epoch": 9.33, "learning_rate": 3.7005729313421623e-06, "loss": 0.5632, "step": 11042, "task_loss": 0.5908714532852173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31641560792922974, "epoch": 9.33, "learning_rate": 3.69587677279985e-06, "loss": 0.596, "step": 11043, "task_loss": 1.2574286460876465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5023374557495117, "epoch": 9.34, "learning_rate": 3.6911806142575375e-06, "loss": 0.6005, "step": 11044, "task_loss": 0.937214195728302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45589226484298706, "epoch": 9.34, "learning_rate": 3.686484455715225e-06, "loss": 0.5517, "step": 11045, "task_loss": 0.9174095988273621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.612249493598938, "epoch": 9.34, "learning_rate": 3.6817882971729127e-06, "loss": 0.6212, "step": 11046, "task_loss": 0.4904176592826843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6072487831115723, "epoch": 9.34, "learning_rate": 3.6770921386306e-06, "loss": 0.5186, "step": 11047, "task_loss": 0.4403320848941803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4661913812160492, "epoch": 9.34, "learning_rate": 3.6723959800882875e-06, "loss": 0.5492, "step": 11048, "task_loss": 0.4437686502933502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37105125188827515, "epoch": 9.34, "learning_rate": 3.667699821545976e-06, "loss": 0.4962, "step": 11049, "task_loss": 0.36197903752326965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42853355407714844, "epoch": 9.34, "learning_rate": 3.663003663003663e-06, "loss": 0.4361, "step": 11050, "task_loss": 0.5268529057502747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.73084956407547, "epoch": 9.34, "learning_rate": 3.6583075044613508e-06, "loss": 0.6927, "step": 11051, "task_loss": 0.09867341816425323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41655659675598145, "epoch": 9.34, "learning_rate": 3.6536113459190384e-06, "loss": 0.7253, "step": 11052, "task_loss": 0.5291652083396912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5433781147003174, "epoch": 9.34, "learning_rate": 3.648915187376726e-06, "loss": 0.4825, "step": 11053, "task_loss": 0.5281195640563965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5543678402900696, "epoch": 9.34, "learning_rate": 3.6442190288344136e-06, "loss": 0.5127, "step": 11054, "task_loss": 0.39070481061935425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7588258385658264, "epoch": 9.34, "learning_rate": 3.639522870292101e-06, "loss": 0.5411, "step": 11055, "task_loss": 0.6955853700637817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5216155052185059, "epoch": 9.35, "learning_rate": 3.6348267117497893e-06, "loss": 0.6253, "step": 11056, "task_loss": 1.521761178970337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6779745817184448, "epoch": 9.35, "learning_rate": 3.630130553207477e-06, "loss": 0.592, "step": 11057, "task_loss": 1.1219968795776367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3608488440513611, "epoch": 9.35, "learning_rate": 3.625434394665164e-06, "loss": 0.396, "step": 11058, "task_loss": 0.29369691014289856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35975295305252075, "epoch": 9.35, "learning_rate": 3.6207382361228517e-06, "loss": 0.4427, "step": 11059, "task_loss": 1.428280234336853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4416961073875427, "epoch": 9.35, "learning_rate": 3.6160420775805393e-06, "loss": 0.5983, "step": 11060, "task_loss": 0.3952958285808563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6269348859786987, "epoch": 9.35, "learning_rate": 3.611345919038227e-06, "loss": 0.5736, "step": 11061, "task_loss": 0.62345951795578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3874734044075012, "epoch": 9.35, "learning_rate": 3.6066497604959145e-06, "loss": 0.5013, "step": 11062, "task_loss": 0.4433784484863281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.564874529838562, "epoch": 9.35, "learning_rate": 3.6019536019536017e-06, "loss": 0.4676, "step": 11063, "task_loss": 0.8545567989349365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5065994262695312, "epoch": 9.35, "learning_rate": 3.59725744341129e-06, "loss": 0.5122, "step": 11064, "task_loss": 0.7616437673568726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3405569791793823, "epoch": 9.35, "learning_rate": 3.5925612848689777e-06, "loss": 0.4579, "step": 11065, "task_loss": 0.2691078782081604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5748831033706665, "epoch": 9.35, "learning_rate": 3.587865126326665e-06, "loss": 0.618, "step": 11066, "task_loss": 0.4566141963005066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5156427621841431, "epoch": 9.35, "learning_rate": 3.5831689677843525e-06, "loss": 0.4297, "step": 11067, "task_loss": 0.6920779943466187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6525932550430298, "epoch": 9.36, "learning_rate": 3.57847280924204e-06, "loss": 0.6003, "step": 11068, "task_loss": 1.0546236038208008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.460178941488266, "epoch": 9.36, "learning_rate": 3.5737766506997278e-06, "loss": 0.4694, "step": 11069, "task_loss": 0.6075356006622314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.22640904784202576, "epoch": 9.36, "learning_rate": 3.569080492157415e-06, "loss": 0.3696, "step": 11070, "task_loss": 0.2658573091030121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9459089040756226, "epoch": 9.36, "learning_rate": 3.5643843336151026e-06, "loss": 0.6141, "step": 11071, "task_loss": 1.0397311449050903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.760982871055603, "epoch": 9.36, "learning_rate": 3.559688175072791e-06, "loss": 0.4922, "step": 11072, "task_loss": 0.9550304412841797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7811770439147949, "epoch": 9.36, "learning_rate": 3.5549920165304786e-06, "loss": 0.6102, "step": 11073, "task_loss": 0.6926978826522827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7128467559814453, "epoch": 9.36, "learning_rate": 3.550295857988166e-06, "loss": 0.7306, "step": 11074, "task_loss": 0.5807129144668579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4492053985595703, "epoch": 9.36, "learning_rate": 3.5455996994458534e-06, "loss": 0.527, "step": 11075, "task_loss": 0.27513980865478516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5285925269126892, "epoch": 9.36, "learning_rate": 3.540903540903541e-06, "loss": 0.5883, "step": 11076, "task_loss": 0.9709903597831726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5511366724967957, "epoch": 9.36, "learning_rate": 3.5362073823612286e-06, "loss": 0.4817, "step": 11077, "task_loss": 0.41549450159072876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8711551427841187, "epoch": 9.36, "learning_rate": 3.531511223818916e-06, "loss": 0.5391, "step": 11078, "task_loss": 0.6693483591079712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.28105229139328003, "epoch": 9.36, "learning_rate": 3.5268150652766043e-06, "loss": 0.4104, "step": 11079, "task_loss": 0.12162045389413834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6135033965110779, "epoch": 9.37, "learning_rate": 3.522118906734292e-06, "loss": 0.5595, "step": 11080, "task_loss": 1.2336888313293457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39118945598602295, "epoch": 9.37, "learning_rate": 3.517422748191979e-06, "loss": 0.5301, "step": 11081, "task_loss": 0.6035183072090149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7358888387680054, "epoch": 9.37, "learning_rate": 3.5127265896496667e-06, "loss": 0.7053, "step": 11082, "task_loss": 1.8143932819366455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.22508837282657623, "epoch": 9.37, "learning_rate": 3.5080304311073543e-06, "loss": 0.5424, "step": 11083, "task_loss": 0.4394606351852417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.652307391166687, "epoch": 9.37, "learning_rate": 3.503334272565042e-06, "loss": 0.5909, "step": 11084, "task_loss": 1.007152795791626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5368719100952148, "epoch": 9.37, "learning_rate": 3.4986381140227295e-06, "loss": 0.4648, "step": 11085, "task_loss": 0.9044103622436523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3461099863052368, "epoch": 9.37, "learning_rate": 3.4939419554804167e-06, "loss": 0.5445, "step": 11086, "task_loss": 0.4603075385093689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.21174532175064087, "epoch": 9.37, "learning_rate": 3.489245796938105e-06, "loss": 0.474, "step": 11087, "task_loss": 0.6156772375106812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4060130715370178, "epoch": 9.37, "learning_rate": 3.4845496383957928e-06, "loss": 0.4798, "step": 11088, "task_loss": 0.8514322638511658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45813047885894775, "epoch": 9.37, "learning_rate": 3.47985347985348e-06, "loss": 0.6052, "step": 11089, "task_loss": 0.7904375791549683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4391627311706543, "epoch": 9.37, "learning_rate": 3.4751573213111676e-06, "loss": 0.5397, "step": 11090, "task_loss": 0.6872727870941162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4688383638858795, "epoch": 9.38, "learning_rate": 3.470461162768855e-06, "loss": 0.5555, "step": 11091, "task_loss": 1.3488585948944092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4727599024772644, "epoch": 9.38, "learning_rate": 3.465765004226543e-06, "loss": 0.7066, "step": 11092, "task_loss": 0.38656407594680786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4719846844673157, "epoch": 9.38, "learning_rate": 3.4610688456842304e-06, "loss": 0.4996, "step": 11093, "task_loss": 0.34239593148231506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5377851724624634, "epoch": 9.38, "learning_rate": 3.4563726871419176e-06, "loss": 0.4278, "step": 11094, "task_loss": 0.2668091952800751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5338855385780334, "epoch": 9.38, "learning_rate": 3.451676528599606e-06, "loss": 0.5536, "step": 11095, "task_loss": 1.4616986513137817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27259641885757446, "epoch": 9.38, "learning_rate": 3.4469803700572937e-06, "loss": 0.3619, "step": 11096, "task_loss": 0.40984538197517395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0417485237121582, "epoch": 9.38, "learning_rate": 3.442284211514981e-06, "loss": 0.5619, "step": 11097, "task_loss": 0.6893367171287537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48681414127349854, "epoch": 9.38, "learning_rate": 3.4375880529726685e-06, "loss": 0.5705, "step": 11098, "task_loss": 0.4165767729282379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34221890568733215, "epoch": 9.38, "learning_rate": 3.432891894430356e-06, "loss": 0.5149, "step": 11099, "task_loss": 0.6258363127708435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.21900759637355804, "epoch": 9.38, "learning_rate": 3.4281957358880437e-06, "loss": 0.4556, "step": 11100, "task_loss": 0.0390491709113121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40145713090896606, "epoch": 9.38, "learning_rate": 3.4234995773457313e-06, "loss": 0.479, "step": 11101, "task_loss": 1.0186030864715576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5773762464523315, "epoch": 9.38, "learning_rate": 3.4188034188034193e-06, "loss": 0.5442, "step": 11102, "task_loss": 0.5450099110603333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6518995761871338, "epoch": 9.39, "learning_rate": 3.414107260261107e-06, "loss": 0.6151, "step": 11103, "task_loss": 1.013680338859558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.2134112119674683, "epoch": 9.39, "learning_rate": 3.4094111017187945e-06, "loss": 0.9186, "step": 11104, "task_loss": 1.3654720783233643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3784998953342438, "epoch": 9.39, "learning_rate": 3.4047149431764817e-06, "loss": 0.5515, "step": 11105, "task_loss": 0.5940244197845459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5793322324752808, "epoch": 9.39, "learning_rate": 3.4000187846341693e-06, "loss": 0.5124, "step": 11106, "task_loss": 1.4522104263305664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5943914651870728, "epoch": 9.39, "learning_rate": 3.395322626091857e-06, "loss": 0.5091, "step": 11107, "task_loss": 1.0586915016174316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40798231959342957, "epoch": 9.39, "learning_rate": 3.3906264675495446e-06, "loss": 0.5334, "step": 11108, "task_loss": 0.6945458650588989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33675509691238403, "epoch": 9.39, "learning_rate": 3.3859303090072318e-06, "loss": 0.4651, "step": 11109, "task_loss": 0.5027926564216614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6282447576522827, "epoch": 9.39, "learning_rate": 3.38123415046492e-06, "loss": 0.625, "step": 11110, "task_loss": 0.5723469853401184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4841041564941406, "epoch": 9.39, "learning_rate": 3.376537991922608e-06, "loss": 0.4857, "step": 11111, "task_loss": 1.1540573835372925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9749042987823486, "epoch": 9.39, "learning_rate": 3.3718418333802954e-06, "loss": 0.6149, "step": 11112, "task_loss": 1.044559359550476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.363504022359848, "epoch": 9.39, "learning_rate": 3.3671456748379826e-06, "loss": 0.4175, "step": 11113, "task_loss": 0.04462846368551254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46116381883621216, "epoch": 9.39, "learning_rate": 3.3624495162956702e-06, "loss": 0.5181, "step": 11114, "task_loss": 1.4088952541351318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5089102387428284, "epoch": 9.4, "learning_rate": 3.357753357753358e-06, "loss": 0.4528, "step": 11115, "task_loss": 0.3519296944141388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3063187003135681, "epoch": 9.4, "learning_rate": 3.3530571992110454e-06, "loss": 0.5181, "step": 11116, "task_loss": 0.5191404223442078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40484917163848877, "epoch": 9.4, "learning_rate": 3.3483610406687326e-06, "loss": 0.5148, "step": 11117, "task_loss": 0.5656777620315552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42128708958625793, "epoch": 9.4, "learning_rate": 3.343664882126421e-06, "loss": 0.4419, "step": 11118, "task_loss": 0.9541724920272827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7682518362998962, "epoch": 9.4, "learning_rate": 3.3389687235841087e-06, "loss": 0.6116, "step": 11119, "task_loss": 0.4327446222305298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2526981830596924, "epoch": 9.4, "learning_rate": 3.334272565041796e-06, "loss": 0.4906, "step": 11120, "task_loss": 0.4575459361076355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.578287661075592, "epoch": 9.4, "learning_rate": 3.3295764064994835e-06, "loss": 0.461, "step": 11121, "task_loss": 0.4194147288799286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3759574294090271, "epoch": 9.4, "learning_rate": 3.324880247957171e-06, "loss": 0.5168, "step": 11122, "task_loss": 1.132348656654358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4385985732078552, "epoch": 9.4, "learning_rate": 3.3201840894148587e-06, "loss": 0.4941, "step": 11123, "task_loss": 0.4185658097267151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5785167217254639, "epoch": 9.4, "learning_rate": 3.3154879308725463e-06, "loss": 0.5546, "step": 11124, "task_loss": 0.43603217601776123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41371840238571167, "epoch": 9.4, "learning_rate": 3.3107917723302335e-06, "loss": 0.5607, "step": 11125, "task_loss": 0.5315071940422058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6183754205703735, "epoch": 9.4, "learning_rate": 3.306095613787922e-06, "loss": 0.6959, "step": 11126, "task_loss": 1.381882667541504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6612210273742676, "epoch": 9.41, "learning_rate": 3.3013994552456096e-06, "loss": 0.4466, "step": 11127, "task_loss": 0.5642865300178528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5720486640930176, "epoch": 9.41, "learning_rate": 3.2967032967032968e-06, "loss": 0.5481, "step": 11128, "task_loss": 0.7435349225997925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35313236713409424, "epoch": 9.41, "learning_rate": 3.2920071381609844e-06, "loss": 0.5477, "step": 11129, "task_loss": 0.0671318992972374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6041625738143921, "epoch": 9.41, "learning_rate": 3.287310979618672e-06, "loss": 0.5653, "step": 11130, "task_loss": 0.5314701199531555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5017932653427124, "epoch": 9.41, "learning_rate": 3.2826148210763596e-06, "loss": 0.6201, "step": 11131, "task_loss": 1.622761607170105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48829925060272217, "epoch": 9.41, "learning_rate": 3.277918662534047e-06, "loss": 0.5727, "step": 11132, "task_loss": 0.9491240978240967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33855167031288147, "epoch": 9.41, "learning_rate": 3.2732225039917352e-06, "loss": 0.3967, "step": 11133, "task_loss": 0.47685757279396057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44393616914749146, "epoch": 9.41, "learning_rate": 3.268526345449423e-06, "loss": 0.4527, "step": 11134, "task_loss": 0.5089771151542664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4035458266735077, "epoch": 9.41, "learning_rate": 3.2638301869071105e-06, "loss": 0.485, "step": 11135, "task_loss": 0.8136974573135376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26528847217559814, "epoch": 9.41, "learning_rate": 3.2591340283647977e-06, "loss": 0.5091, "step": 11136, "task_loss": 1.0755681991577148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.421213299036026, "epoch": 9.41, "learning_rate": 3.2544378698224853e-06, "loss": 0.5128, "step": 11137, "task_loss": 0.47568678855895996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4266492426395416, "epoch": 9.41, "learning_rate": 3.249741711280173e-06, "loss": 0.3686, "step": 11138, "task_loss": 0.6740009784698486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5737830996513367, "epoch": 9.42, "learning_rate": 3.2450455527378605e-06, "loss": 0.5747, "step": 11139, "task_loss": 0.4693962633609772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5508561134338379, "epoch": 9.42, "learning_rate": 3.240349394195548e-06, "loss": 0.5583, "step": 11140, "task_loss": 1.0493996143341064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3767312169075012, "epoch": 9.42, "learning_rate": 3.235653235653236e-06, "loss": 0.5382, "step": 11141, "task_loss": 0.6467189788818359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48803985118865967, "epoch": 9.42, "learning_rate": 3.2309570771109237e-06, "loss": 0.6365, "step": 11142, "task_loss": 0.4649916887283325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3532959222793579, "epoch": 9.42, "learning_rate": 3.2262609185686113e-06, "loss": 0.5312, "step": 11143, "task_loss": 0.17115454375743866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.28788018226623535, "epoch": 9.42, "learning_rate": 3.2215647600262985e-06, "loss": 0.4798, "step": 11144, "task_loss": 0.5120036005973816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5799859762191772, "epoch": 9.42, "learning_rate": 3.216868601483986e-06, "loss": 0.4881, "step": 11145, "task_loss": 0.6663724780082703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6412619352340698, "epoch": 9.42, "learning_rate": 3.2121724429416738e-06, "loss": 0.7726, "step": 11146, "task_loss": 0.8502026200294495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3957134485244751, "epoch": 9.42, "learning_rate": 3.2074762843993614e-06, "loss": 0.4964, "step": 11147, "task_loss": 0.45385581254959106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5722328424453735, "epoch": 9.42, "learning_rate": 3.2027801258570485e-06, "loss": 0.5131, "step": 11148, "task_loss": 0.48823657631874084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4369701147079468, "epoch": 9.42, "learning_rate": 3.198083967314737e-06, "loss": 0.4662, "step": 11149, "task_loss": 0.7067809104919434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5280759334564209, "epoch": 9.42, "learning_rate": 3.1933878087724246e-06, "loss": 0.5075, "step": 11150, "task_loss": 1.2644503116607666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3056628108024597, "epoch": 9.43, "learning_rate": 3.1886916502301122e-06, "loss": 0.4695, "step": 11151, "task_loss": 0.9946158528327942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5277331471443176, "epoch": 9.43, "learning_rate": 3.1839954916877994e-06, "loss": 0.6502, "step": 11152, "task_loss": 0.6809747219085693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5977619886398315, "epoch": 9.43, "learning_rate": 3.179299333145487e-06, "loss": 0.5436, "step": 11153, "task_loss": 0.40614473819732666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5245158672332764, "epoch": 9.43, "learning_rate": 3.1746031746031746e-06, "loss": 0.6006, "step": 11154, "task_loss": 0.6522426605224609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2320854365825653, "epoch": 9.43, "learning_rate": 3.1699070160608622e-06, "loss": 0.3882, "step": 11155, "task_loss": 0.3635943830013275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3936408758163452, "epoch": 9.43, "learning_rate": 3.1652108575185503e-06, "loss": 0.4528, "step": 11156, "task_loss": 0.23086361587047577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6488014459609985, "epoch": 9.43, "learning_rate": 3.160514698976238e-06, "loss": 0.5795, "step": 11157, "task_loss": 1.0951848030090332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7584950923919678, "epoch": 9.43, "learning_rate": 3.1558185404339255e-06, "loss": 0.5088, "step": 11158, "task_loss": 0.7967728972434998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4011043310165405, "epoch": 9.43, "learning_rate": 3.1511223818916127e-06, "loss": 0.6411, "step": 11159, "task_loss": 0.6001083850860596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.261152446269989, "epoch": 9.43, "learning_rate": 3.1464262233493003e-06, "loss": 0.4621, "step": 11160, "task_loss": 0.22438998520374298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41119974851608276, "epoch": 9.43, "learning_rate": 3.141730064806988e-06, "loss": 0.4448, "step": 11161, "task_loss": 0.2686886489391327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49498239159584045, "epoch": 9.44, "learning_rate": 3.1370339062646755e-06, "loss": 0.5443, "step": 11162, "task_loss": 0.661995530128479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7829129099845886, "epoch": 9.44, "learning_rate": 3.132337747722363e-06, "loss": 0.508, "step": 11163, "task_loss": 0.9403130412101746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.24090033769607544, "epoch": 9.44, "learning_rate": 3.127641589180051e-06, "loss": 0.4948, "step": 11164, "task_loss": 0.8147763013839722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4559568166732788, "epoch": 9.44, "learning_rate": 3.1229454306377383e-06, "loss": 0.4511, "step": 11165, "task_loss": 1.1299328804016113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29474303126335144, "epoch": 9.44, "learning_rate": 3.1182492720954264e-06, "loss": 0.4699, "step": 11166, "task_loss": 1.2656724452972412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.386243999004364, "epoch": 9.44, "learning_rate": 3.1135531135531136e-06, "loss": 0.3807, "step": 11167, "task_loss": 0.21686150133609772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7215062379837036, "epoch": 9.44, "learning_rate": 3.108856955010801e-06, "loss": 0.5919, "step": 11168, "task_loss": 0.690640926361084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46041086316108704, "epoch": 9.44, "learning_rate": 3.1041607964684888e-06, "loss": 0.5118, "step": 11169, "task_loss": 1.5581337213516235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5054048299789429, "epoch": 9.44, "learning_rate": 3.099464637926177e-06, "loss": 0.6134, "step": 11170, "task_loss": 0.5234246253967285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48746350407600403, "epoch": 9.44, "learning_rate": 3.094768479383864e-06, "loss": 0.5169, "step": 11171, "task_loss": 1.117415428161621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3303918242454529, "epoch": 9.44, "learning_rate": 3.0900723208415516e-06, "loss": 0.4519, "step": 11172, "task_loss": 0.6592282056808472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7479546666145325, "epoch": 9.44, "learning_rate": 3.0853761622992392e-06, "loss": 0.4958, "step": 11173, "task_loss": 1.226424217224121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3579331040382385, "epoch": 9.45, "learning_rate": 3.0806800037569273e-06, "loss": 0.5864, "step": 11174, "task_loss": 0.8030137419700623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5451611876487732, "epoch": 9.45, "learning_rate": 3.0759838452146144e-06, "loss": 0.4261, "step": 11175, "task_loss": 0.9791380167007446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4265974164009094, "epoch": 9.45, "learning_rate": 3.071287686672302e-06, "loss": 0.6039, "step": 11176, "task_loss": 0.82646244764328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35711416602134705, "epoch": 9.45, "learning_rate": 3.0665915281299897e-06, "loss": 0.3812, "step": 11177, "task_loss": 0.18740399181842804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.21724477410316467, "epoch": 9.45, "learning_rate": 3.0618953695876777e-06, "loss": 0.4176, "step": 11178, "task_loss": 0.5868754982948303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7397257089614868, "epoch": 9.45, "learning_rate": 3.057199211045365e-06, "loss": 0.7367, "step": 11179, "task_loss": 0.7123569250106812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43678778409957886, "epoch": 9.45, "learning_rate": 3.0525030525030525e-06, "loss": 0.6414, "step": 11180, "task_loss": 0.0496927909553051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2961001992225647, "epoch": 9.45, "learning_rate": 3.0478068939607405e-06, "loss": 0.4414, "step": 11181, "task_loss": 0.9072253108024597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4923210144042969, "epoch": 9.45, "learning_rate": 3.043110735418428e-06, "loss": 0.6522, "step": 11182, "task_loss": 1.3834266662597656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46130937337875366, "epoch": 9.45, "learning_rate": 3.0384145768761153e-06, "loss": 0.5212, "step": 11183, "task_loss": 0.4071640074253082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47120994329452515, "epoch": 9.45, "learning_rate": 3.033718418333803e-06, "loss": 0.5115, "step": 11184, "task_loss": 0.5823133587837219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5720561742782593, "epoch": 9.45, "learning_rate": 3.029022259791491e-06, "loss": 0.6199, "step": 11185, "task_loss": 0.9620575904846191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.20727594196796417, "epoch": 9.46, "learning_rate": 3.0243261012491786e-06, "loss": 0.3713, "step": 11186, "task_loss": 0.17668583989143372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6871854066848755, "epoch": 9.46, "learning_rate": 3.0196299427068658e-06, "loss": 0.4828, "step": 11187, "task_loss": 0.70911705493927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42164304852485657, "epoch": 9.46, "learning_rate": 3.0149337841645534e-06, "loss": 0.4988, "step": 11188, "task_loss": 0.7016412019729614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4511631727218628, "epoch": 9.46, "learning_rate": 3.0102376256222414e-06, "loss": 0.6191, "step": 11189, "task_loss": 1.4086159467697144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4629976153373718, "epoch": 9.46, "learning_rate": 3.0055414670799286e-06, "loss": 0.4251, "step": 11190, "task_loss": 1.0189316272735596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3483739197254181, "epoch": 9.46, "learning_rate": 3.0008453085376162e-06, "loss": 0.5225, "step": 11191, "task_loss": 0.3746688663959503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37353307008743286, "epoch": 9.46, "learning_rate": 2.996149149995304e-06, "loss": 0.4624, "step": 11192, "task_loss": 0.7462712526321411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6474190950393677, "epoch": 9.46, "learning_rate": 2.991452991452992e-06, "loss": 0.5293, "step": 11193, "task_loss": 0.8660323619842529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32418981194496155, "epoch": 9.46, "learning_rate": 2.986756832910679e-06, "loss": 0.4336, "step": 11194, "task_loss": 1.6433663368225098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47264644503593445, "epoch": 9.46, "learning_rate": 2.9820606743683667e-06, "loss": 0.5506, "step": 11195, "task_loss": 1.3272998332977295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5458943843841553, "epoch": 9.46, "learning_rate": 2.9773645158260543e-06, "loss": 0.4974, "step": 11196, "task_loss": 0.5552412271499634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3996809422969818, "epoch": 9.46, "learning_rate": 2.9726683572837423e-06, "loss": 0.469, "step": 11197, "task_loss": 0.9691037535667419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34169983863830566, "epoch": 9.47, "learning_rate": 2.9679721987414295e-06, "loss": 0.5313, "step": 11198, "task_loss": 0.5721960663795471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6544723510742188, "epoch": 9.47, "learning_rate": 2.963276040199117e-06, "loss": 0.5418, "step": 11199, "task_loss": 0.9403088688850403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3988778591156006, "epoch": 9.47, "learning_rate": 2.9585798816568047e-06, "loss": 0.4839, "step": 11200, "task_loss": 0.4801325500011444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0166423320770264, "epoch": 9.47, "learning_rate": 2.9538837231144927e-06, "loss": 0.5916, "step": 11201, "task_loss": 0.7992338538169861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42222321033477783, "epoch": 9.47, "learning_rate": 2.94918756457218e-06, "loss": 0.4958, "step": 11202, "task_loss": 0.036304257810115814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4522770643234253, "epoch": 9.47, "learning_rate": 2.9444914060298675e-06, "loss": 0.6033, "step": 11203, "task_loss": 1.3430167436599731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.882460355758667, "epoch": 9.47, "learning_rate": 2.939795247487555e-06, "loss": 0.5889, "step": 11204, "task_loss": 0.7951751351356506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.16033318638801575, "epoch": 9.47, "learning_rate": 2.935099088945243e-06, "loss": 0.429, "step": 11205, "task_loss": 0.042873214930295944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3059881925582886, "epoch": 9.47, "learning_rate": 2.9304029304029304e-06, "loss": 0.4367, "step": 11206, "task_loss": 0.13734614849090576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46094363927841187, "epoch": 9.47, "learning_rate": 2.925706771860618e-06, "loss": 0.4416, "step": 11207, "task_loss": 0.2818050682544708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7939265966415405, "epoch": 9.47, "learning_rate": 2.921010613318306e-06, "loss": 0.5391, "step": 11208, "task_loss": 1.272094488143921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4700500965118408, "epoch": 9.47, "learning_rate": 2.9163144547759936e-06, "loss": 0.6088, "step": 11209, "task_loss": 1.058397889137268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8714977502822876, "epoch": 9.48, "learning_rate": 2.911618296233681e-06, "loss": 0.6236, "step": 11210, "task_loss": 1.0277800559997559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38214918971061707, "epoch": 9.48, "learning_rate": 2.9069221376913684e-06, "loss": 0.5704, "step": 11211, "task_loss": 0.3103393316268921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5490093231201172, "epoch": 9.48, "learning_rate": 2.9022259791490565e-06, "loss": 0.5632, "step": 11212, "task_loss": 0.42381948232650757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5010666847229004, "epoch": 9.48, "learning_rate": 2.897529820606744e-06, "loss": 0.5205, "step": 11213, "task_loss": 0.5145685076713562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.274164080619812, "epoch": 9.48, "learning_rate": 2.8928336620644312e-06, "loss": 0.4008, "step": 11214, "task_loss": 0.5901873707771301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3016600012779236, "epoch": 9.48, "learning_rate": 2.888137503522119e-06, "loss": 0.4355, "step": 11215, "task_loss": 0.2540227770805359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5681004524230957, "epoch": 9.48, "learning_rate": 2.883441344979807e-06, "loss": 0.5127, "step": 11216, "task_loss": 1.0331302881240845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.268777072429657, "epoch": 9.48, "learning_rate": 2.8787451864374945e-06, "loss": 0.5467, "step": 11217, "task_loss": 0.4703216552734375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4104372560977936, "epoch": 9.48, "learning_rate": 2.8740490278951817e-06, "loss": 0.4633, "step": 11218, "task_loss": 0.1668110191822052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4366608262062073, "epoch": 9.48, "learning_rate": 2.8693528693528693e-06, "loss": 0.4705, "step": 11219, "task_loss": 0.9091637134552002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4559769630432129, "epoch": 9.48, "learning_rate": 2.8646567108105573e-06, "loss": 0.5085, "step": 11220, "task_loss": 0.29177388548851013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6140218377113342, "epoch": 9.48, "learning_rate": 2.859960552268245e-06, "loss": 0.5857, "step": 11221, "task_loss": 1.3214465379714966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40185555815696716, "epoch": 9.49, "learning_rate": 2.855264393725932e-06, "loss": 0.4727, "step": 11222, "task_loss": 0.6150964498519897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.647117018699646, "epoch": 9.49, "learning_rate": 2.8505682351836197e-06, "loss": 0.5699, "step": 11223, "task_loss": 0.9286736249923706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6205156445503235, "epoch": 9.49, "learning_rate": 2.8458720766413078e-06, "loss": 0.6229, "step": 11224, "task_loss": 1.1973992586135864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49759116768836975, "epoch": 9.49, "learning_rate": 2.8411759180989954e-06, "loss": 0.7009, "step": 11225, "task_loss": 0.9334624409675598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6623815298080444, "epoch": 9.49, "learning_rate": 2.8364797595566826e-06, "loss": 0.5065, "step": 11226, "task_loss": 0.707656979560852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7607128620147705, "epoch": 9.49, "learning_rate": 2.83178360101437e-06, "loss": 0.6007, "step": 11227, "task_loss": 1.4120184183120728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5339524745941162, "epoch": 9.49, "learning_rate": 2.8270874424720582e-06, "loss": 0.6157, "step": 11228, "task_loss": 0.13726010918617249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6084080338478088, "epoch": 9.49, "learning_rate": 2.8223912839297454e-06, "loss": 0.6096, "step": 11229, "task_loss": 0.3943917751312256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4695062041282654, "epoch": 9.49, "learning_rate": 2.817695125387433e-06, "loss": 0.501, "step": 11230, "task_loss": 0.5516911149024963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4157288670539856, "epoch": 9.49, "learning_rate": 2.8129989668451206e-06, "loss": 0.4966, "step": 11231, "task_loss": 0.3727318048477173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7830945253372192, "epoch": 9.49, "learning_rate": 2.8083028083028087e-06, "loss": 0.4684, "step": 11232, "task_loss": 1.0032055377960205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5093777179718018, "epoch": 9.5, "learning_rate": 2.803606649760496e-06, "loss": 0.4463, "step": 11233, "task_loss": 0.6610000729560852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3209995627403259, "epoch": 9.5, "learning_rate": 2.7989104912181835e-06, "loss": 0.4928, "step": 11234, "task_loss": 0.38961416482925415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4636665880680084, "epoch": 9.5, "learning_rate": 2.7942143326758715e-06, "loss": 0.3332, "step": 11235, "task_loss": 0.6705412268638611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.540105938911438, "epoch": 9.5, "learning_rate": 2.789518174133559e-06, "loss": 0.5579, "step": 11236, "task_loss": 0.5144619941711426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5632665157318115, "epoch": 9.5, "learning_rate": 2.7848220155912463e-06, "loss": 0.4632, "step": 11237, "task_loss": 0.29170650243759155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5104866027832031, "epoch": 9.5, "learning_rate": 2.780125857048934e-06, "loss": 0.415, "step": 11238, "task_loss": 1.0635499954223633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9305683970451355, "epoch": 9.5, "learning_rate": 2.775429698506622e-06, "loss": 0.555, "step": 11239, "task_loss": 2.3295421600341797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6848259568214417, "epoch": 9.5, "learning_rate": 2.7707335399643095e-06, "loss": 0.7191, "step": 11240, "task_loss": 0.5240308046340942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41282981634140015, "epoch": 9.5, "learning_rate": 2.7660373814219967e-06, "loss": 0.4708, "step": 11241, "task_loss": 0.8723867535591125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36572059988975525, "epoch": 9.5, "learning_rate": 2.7613412228796843e-06, "loss": 0.5519, "step": 11242, "task_loss": 1.1419029235839844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45477885007858276, "epoch": 9.5, "learning_rate": 2.7566450643373724e-06, "loss": 0.5276, "step": 11243, "task_loss": 1.0466963052749634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6167359352111816, "epoch": 9.5, "learning_rate": 2.75194890579506e-06, "loss": 0.5451, "step": 11244, "task_loss": 0.42671918869018555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6690115928649902, "epoch": 9.51, "learning_rate": 2.747252747252747e-06, "loss": 0.5508, "step": 11245, "task_loss": 0.9016473293304443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34628939628601074, "epoch": 9.51, "learning_rate": 2.7425565887104348e-06, "loss": 0.4635, "step": 11246, "task_loss": 0.2055140733718872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.494573175907135, "epoch": 9.51, "learning_rate": 2.737860430168123e-06, "loss": 0.635, "step": 11247, "task_loss": 0.8175778388977051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7489429712295532, "epoch": 9.51, "learning_rate": 2.7331642716258104e-06, "loss": 0.5704, "step": 11248, "task_loss": 0.6133424043655396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8406216502189636, "epoch": 9.51, "learning_rate": 2.7284681130834976e-06, "loss": 0.5725, "step": 11249, "task_loss": 0.5567544102668762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6654524803161621, "epoch": 9.51, "learning_rate": 2.7237719545411852e-06, "loss": 0.4761, "step": 11250, "task_loss": 1.023114562034607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44162270426750183, "epoch": 9.51, "learning_rate": 2.7190757959988733e-06, "loss": 0.6053, "step": 11251, "task_loss": 0.4087909758090973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7131583094596863, "epoch": 9.51, "learning_rate": 2.714379637456561e-06, "loss": 0.5148, "step": 11252, "task_loss": 0.7246546149253845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43228936195373535, "epoch": 9.51, "learning_rate": 2.709683478914248e-06, "loss": 0.4739, "step": 11253, "task_loss": 0.7765113711357117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1676274538040161, "epoch": 9.51, "learning_rate": 2.7049873203719357e-06, "loss": 0.7663, "step": 11254, "task_loss": 1.8677423000335693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6133624911308289, "epoch": 9.51, "learning_rate": 2.7002911618296237e-06, "loss": 0.5008, "step": 11255, "task_loss": 0.2914692163467407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2611619830131531, "epoch": 9.51, "learning_rate": 2.6955950032873113e-06, "loss": 0.4471, "step": 11256, "task_loss": 0.9017422795295715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6758955121040344, "epoch": 9.52, "learning_rate": 2.6908988447449985e-06, "loss": 0.5281, "step": 11257, "task_loss": 0.5694376230239868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3278709352016449, "epoch": 9.52, "learning_rate": 2.6862026862026865e-06, "loss": 0.5491, "step": 11258, "task_loss": 0.33973678946495056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33865565061569214, "epoch": 9.52, "learning_rate": 2.681506527660374e-06, "loss": 0.3869, "step": 11259, "task_loss": 0.3726865351200104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34727993607521057, "epoch": 9.52, "learning_rate": 2.6768103691180617e-06, "loss": 0.5343, "step": 11260, "task_loss": 0.368431955575943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41084638237953186, "epoch": 9.52, "learning_rate": 2.672114210575749e-06, "loss": 0.5498, "step": 11261, "task_loss": 0.579615592956543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48449406027793884, "epoch": 9.52, "learning_rate": 2.667418052033437e-06, "loss": 0.5411, "step": 11262, "task_loss": 0.4631625711917877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.034287691116333, "epoch": 9.52, "learning_rate": 2.6627218934911246e-06, "loss": 0.5591, "step": 11263, "task_loss": 0.46066465973854065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39564943313598633, "epoch": 9.52, "learning_rate": 2.658025734948812e-06, "loss": 0.4808, "step": 11264, "task_loss": 0.7466022372245789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4284217059612274, "epoch": 9.52, "learning_rate": 2.6533295764064994e-06, "loss": 0.4987, "step": 11265, "task_loss": 0.5205172896385193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6341894268989563, "epoch": 9.52, "learning_rate": 2.6486334178641874e-06, "loss": 0.5597, "step": 11266, "task_loss": 0.6443308591842651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8292759656906128, "epoch": 9.52, "learning_rate": 2.643937259321875e-06, "loss": 0.5684, "step": 11267, "task_loss": 1.3004393577575684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5298853516578674, "epoch": 9.52, "learning_rate": 2.639241100779562e-06, "loss": 0.5623, "step": 11268, "task_loss": 0.4081950783729553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6687687635421753, "epoch": 9.53, "learning_rate": 2.63454494223725e-06, "loss": 0.475, "step": 11269, "task_loss": 0.8442573547363281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2903430461883545, "epoch": 9.53, "learning_rate": 2.629848783694938e-06, "loss": 0.5469, "step": 11270, "task_loss": 0.4228554368019104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37894803285598755, "epoch": 9.53, "learning_rate": 2.6251526251526255e-06, "loss": 0.6194, "step": 11271, "task_loss": 0.9606747031211853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34147006273269653, "epoch": 9.53, "learning_rate": 2.6204564666103126e-06, "loss": 0.438, "step": 11272, "task_loss": 0.17741194367408752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4077605903148651, "epoch": 9.53, "learning_rate": 2.6157603080680003e-06, "loss": 0.518, "step": 11273, "task_loss": 0.7894208431243896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6549860239028931, "epoch": 9.53, "learning_rate": 2.6110641495256883e-06, "loss": 0.4827, "step": 11274, "task_loss": 0.6128246784210205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4879722595214844, "epoch": 9.53, "learning_rate": 2.606367990983376e-06, "loss": 0.5812, "step": 11275, "task_loss": 0.4571336805820465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3282809853553772, "epoch": 9.53, "learning_rate": 2.601671832441063e-06, "loss": 0.4019, "step": 11276, "task_loss": 0.13544930517673492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3447266221046448, "epoch": 9.53, "learning_rate": 2.5969756738987507e-06, "loss": 0.4597, "step": 11277, "task_loss": 0.08066228032112122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6418187022209167, "epoch": 9.53, "learning_rate": 2.5922795153564387e-06, "loss": 0.4615, "step": 11278, "task_loss": 0.8107903003692627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.21287629008293152, "epoch": 9.53, "learning_rate": 2.5875833568141263e-06, "loss": 0.361, "step": 11279, "task_loss": 1.2635207176208496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4982442855834961, "epoch": 9.53, "learning_rate": 2.5828871982718135e-06, "loss": 0.4159, "step": 11280, "task_loss": 0.4000909924507141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7183154225349426, "epoch": 9.54, "learning_rate": 2.578191039729501e-06, "loss": 0.4555, "step": 11281, "task_loss": 2.55143404006958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8193515539169312, "epoch": 9.54, "learning_rate": 2.573494881187189e-06, "loss": 0.7928, "step": 11282, "task_loss": 0.7306914925575256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6971725225448608, "epoch": 9.54, "learning_rate": 2.5687987226448768e-06, "loss": 0.6143, "step": 11283, "task_loss": 1.3449386358261108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5623189210891724, "epoch": 9.54, "learning_rate": 2.564102564102564e-06, "loss": 0.4487, "step": 11284, "task_loss": 0.3273874819278717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.376304566860199, "epoch": 9.54, "learning_rate": 2.559406405560252e-06, "loss": 0.44, "step": 11285, "task_loss": 1.212302803993225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.582756519317627, "epoch": 9.54, "learning_rate": 2.5547102470179396e-06, "loss": 0.5239, "step": 11286, "task_loss": 0.7991886734962463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.24929919838905334, "epoch": 9.54, "learning_rate": 2.5500140884756272e-06, "loss": 0.5394, "step": 11287, "task_loss": 0.21246790885925293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8255369663238525, "epoch": 9.54, "learning_rate": 2.5453179299333144e-06, "loss": 0.6237, "step": 11288, "task_loss": 1.769574761390686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3857818841934204, "epoch": 9.54, "learning_rate": 2.5406217713910024e-06, "loss": 0.501, "step": 11289, "task_loss": 0.537086546421051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7263843417167664, "epoch": 9.54, "learning_rate": 2.53592561284869e-06, "loss": 0.6269, "step": 11290, "task_loss": 1.3120977878570557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34242308139801025, "epoch": 9.54, "learning_rate": 2.5312294543063777e-06, "loss": 0.5248, "step": 11291, "task_loss": 0.5258371233940125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4857950806617737, "epoch": 9.54, "learning_rate": 2.526533295764065e-06, "loss": 0.515, "step": 11292, "task_loss": 1.5790259838104248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7454334497451782, "epoch": 9.55, "learning_rate": 2.521837137221753e-06, "loss": 0.5885, "step": 11293, "task_loss": 0.6996546387672424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5637484788894653, "epoch": 9.55, "learning_rate": 2.5171409786794405e-06, "loss": 0.5348, "step": 11294, "task_loss": 0.4666770100593567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5272043943405151, "epoch": 9.55, "learning_rate": 2.512444820137128e-06, "loss": 0.512, "step": 11295, "task_loss": 0.0990787148475647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8896141052246094, "epoch": 9.55, "learning_rate": 2.5077486615948153e-06, "loss": 0.6308, "step": 11296, "task_loss": 0.8580878973007202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.20602408051490784, "epoch": 9.55, "learning_rate": 2.5030525030525033e-06, "loss": 0.4619, "step": 11297, "task_loss": 0.739088773727417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6215112209320068, "epoch": 9.55, "learning_rate": 2.498356344510191e-06, "loss": 0.5752, "step": 11298, "task_loss": 0.8936008214950562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42583224177360535, "epoch": 9.55, "learning_rate": 2.4936601859678785e-06, "loss": 0.4154, "step": 11299, "task_loss": 1.0302528142929077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.52662193775177, "epoch": 9.55, "learning_rate": 2.4889640274255657e-06, "loss": 0.5451, "step": 11300, "task_loss": 0.5620646476745605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5354537963867188, "epoch": 9.55, "learning_rate": 2.4842678688832538e-06, "loss": 0.6953, "step": 11301, "task_loss": 1.377805233001709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37558692693710327, "epoch": 9.55, "learning_rate": 2.4795717103409414e-06, "loss": 0.5588, "step": 11302, "task_loss": 0.5125276446342468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7456398010253906, "epoch": 9.55, "learning_rate": 2.474875551798629e-06, "loss": 0.4853, "step": 11303, "task_loss": 0.42750084400177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6304659843444824, "epoch": 9.56, "learning_rate": 2.470179393256316e-06, "loss": 0.5901, "step": 11304, "task_loss": 0.5211800336837769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5290344953536987, "epoch": 9.56, "learning_rate": 2.465483234714004e-06, "loss": 0.4549, "step": 11305, "task_loss": 0.6805067658424377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3372125029563904, "epoch": 9.56, "learning_rate": 2.460787076171692e-06, "loss": 0.4878, "step": 11306, "task_loss": 0.7358293533325195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4547599256038666, "epoch": 9.56, "learning_rate": 2.456090917629379e-06, "loss": 0.4554, "step": 11307, "task_loss": 0.43480032682418823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3850014805793762, "epoch": 9.56, "learning_rate": 2.4513947590870666e-06, "loss": 0.5356, "step": 11308, "task_loss": 0.6023352146148682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7165654301643372, "epoch": 9.56, "learning_rate": 2.4466986005447546e-06, "loss": 0.5904, "step": 11309, "task_loss": 0.6416550874710083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3178946375846863, "epoch": 9.56, "learning_rate": 2.4420024420024423e-06, "loss": 0.4502, "step": 11310, "task_loss": 0.34107497334480286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3706023693084717, "epoch": 9.56, "learning_rate": 2.4373062834601294e-06, "loss": 0.568, "step": 11311, "task_loss": 0.6563177704811096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3167952597141266, "epoch": 9.56, "learning_rate": 2.4326101249178175e-06, "loss": 0.5373, "step": 11312, "task_loss": 0.9530991911888123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5107266306877136, "epoch": 9.56, "learning_rate": 2.427913966375505e-06, "loss": 0.524, "step": 11313, "task_loss": 0.5436562299728394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47211408615112305, "epoch": 9.56, "learning_rate": 2.4232178078331927e-06, "loss": 0.5131, "step": 11314, "task_loss": 0.9465396404266357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4294113218784332, "epoch": 9.56, "learning_rate": 2.41852164929088e-06, "loss": 0.5294, "step": 11315, "task_loss": 0.6846963167190552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4992968440055847, "epoch": 9.57, "learning_rate": 2.413825490748568e-06, "loss": 0.5218, "step": 11316, "task_loss": 0.6493985056877136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6440683603286743, "epoch": 9.57, "learning_rate": 2.4091293322062555e-06, "loss": 0.4644, "step": 11317, "task_loss": 0.5794883966445923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29907143115997314, "epoch": 9.57, "learning_rate": 2.404433173663943e-06, "loss": 0.3702, "step": 11318, "task_loss": 0.732492208480835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7898114919662476, "epoch": 9.57, "learning_rate": 2.3997370151216303e-06, "loss": 0.5684, "step": 11319, "task_loss": 0.40680450201034546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29804039001464844, "epoch": 9.57, "learning_rate": 2.3950408565793184e-06, "loss": 0.4027, "step": 11320, "task_loss": 0.6481256484985352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.268817663192749, "epoch": 9.57, "learning_rate": 2.390344698037006e-06, "loss": 0.4783, "step": 11321, "task_loss": 0.22973990440368652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5382809042930603, "epoch": 9.57, "learning_rate": 2.3856485394946936e-06, "loss": 0.5271, "step": 11322, "task_loss": 0.5735762715339661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.645259439945221, "epoch": 9.57, "learning_rate": 2.3809523809523808e-06, "loss": 0.5396, "step": 11323, "task_loss": 0.4066629707813263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.512957751750946, "epoch": 9.57, "learning_rate": 2.376256222410069e-06, "loss": 0.8537, "step": 11324, "task_loss": 0.814938485622406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5392708778381348, "epoch": 9.57, "learning_rate": 2.3715600638677564e-06, "loss": 0.4985, "step": 11325, "task_loss": 0.634757936000824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5486794710159302, "epoch": 9.57, "learning_rate": 2.366863905325444e-06, "loss": 0.563, "step": 11326, "task_loss": 1.3729934692382812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3877533972263336, "epoch": 9.57, "learning_rate": 2.362167746783131e-06, "loss": 0.4811, "step": 11327, "task_loss": 0.07898153364658356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.511390745639801, "epoch": 9.58, "learning_rate": 2.3574715882408192e-06, "loss": 0.5266, "step": 11328, "task_loss": 0.7197743654251099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6811339855194092, "epoch": 9.58, "learning_rate": 2.352775429698507e-06, "loss": 0.4995, "step": 11329, "task_loss": 0.5202093720436096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4027290940284729, "epoch": 9.58, "learning_rate": 2.3480792711561945e-06, "loss": 0.4521, "step": 11330, "task_loss": 1.1581066846847534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48748883605003357, "epoch": 9.58, "learning_rate": 2.3433831126138816e-06, "loss": 0.45, "step": 11331, "task_loss": 0.6179871559143066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39379796385765076, "epoch": 9.58, "learning_rate": 2.3386869540715697e-06, "loss": 0.458, "step": 11332, "task_loss": 0.36207693815231323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5549376010894775, "epoch": 9.58, "learning_rate": 2.3339907955292573e-06, "loss": 0.4775, "step": 11333, "task_loss": 0.7678676843643188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4560735523700714, "epoch": 9.58, "learning_rate": 2.329294636986945e-06, "loss": 0.3875, "step": 11334, "task_loss": 0.20659320056438446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5894907712936401, "epoch": 9.58, "learning_rate": 2.324598478444632e-06, "loss": 0.5415, "step": 11335, "task_loss": 0.5482704043388367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4887601137161255, "epoch": 9.58, "learning_rate": 2.31990231990232e-06, "loss": 0.5113, "step": 11336, "task_loss": 0.3197711408138275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35389626026153564, "epoch": 9.58, "learning_rate": 2.3152061613600077e-06, "loss": 0.4929, "step": 11337, "task_loss": 0.03268735110759735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5310840606689453, "epoch": 9.58, "learning_rate": 2.3105100028176953e-06, "loss": 0.6531, "step": 11338, "task_loss": 1.220650315284729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2346491515636444, "epoch": 9.58, "learning_rate": 2.305813844275383e-06, "loss": 0.3834, "step": 11339, "task_loss": 0.4420134723186493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7331980466842651, "epoch": 9.59, "learning_rate": 2.3011176857330706e-06, "loss": 0.5783, "step": 11340, "task_loss": 0.18451139330863953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.523724377155304, "epoch": 9.59, "learning_rate": 2.296421527190758e-06, "loss": 0.622, "step": 11341, "task_loss": 1.389329195022583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5589215755462646, "epoch": 9.59, "learning_rate": 2.2917253686484458e-06, "loss": 0.6443, "step": 11342, "task_loss": 0.8657656311988831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43964141607284546, "epoch": 9.59, "learning_rate": 2.2870292101061334e-06, "loss": 0.4863, "step": 11343, "task_loss": 0.4785189926624298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5768787264823914, "epoch": 9.59, "learning_rate": 2.282333051563821e-06, "loss": 0.5098, "step": 11344, "task_loss": 1.070552110671997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46499505639076233, "epoch": 9.59, "learning_rate": 2.2776368930215086e-06, "loss": 0.4859, "step": 11345, "task_loss": 0.7278861403465271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6427371501922607, "epoch": 9.59, "learning_rate": 2.272940734479196e-06, "loss": 0.6274, "step": 11346, "task_loss": 0.979988694190979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3634743392467499, "epoch": 9.59, "learning_rate": 2.268244575936884e-06, "loss": 0.4784, "step": 11347, "task_loss": 0.25367221236228943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5737418532371521, "epoch": 9.59, "learning_rate": 2.2635484173945714e-06, "loss": 0.4596, "step": 11348, "task_loss": 1.2101728916168213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3761008679866791, "epoch": 9.59, "learning_rate": 2.258852258852259e-06, "loss": 0.5955, "step": 11349, "task_loss": 0.5186026096343994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6712108850479126, "epoch": 9.59, "learning_rate": 2.2541561003099462e-06, "loss": 0.546, "step": 11350, "task_loss": 1.1674824953079224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.22443804144859314, "epoch": 9.59, "learning_rate": 2.2494599417676343e-06, "loss": 0.5093, "step": 11351, "task_loss": 0.1910436451435089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6153391003608704, "epoch": 9.6, "learning_rate": 2.244763783225322e-06, "loss": 0.559, "step": 11352, "task_loss": 0.24001070857048035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6334892511367798, "epoch": 9.6, "learning_rate": 2.2400676246830095e-06, "loss": 0.6167, "step": 11353, "task_loss": 0.8446433544158936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.57476407289505, "epoch": 9.6, "learning_rate": 2.2353714661406967e-06, "loss": 0.4768, "step": 11354, "task_loss": 0.5863592028617859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6371542811393738, "epoch": 9.6, "learning_rate": 2.2306753075983847e-06, "loss": 0.6046, "step": 11355, "task_loss": 1.3442143201828003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5077188611030579, "epoch": 9.6, "learning_rate": 2.2259791490560723e-06, "loss": 0.5565, "step": 11356, "task_loss": 1.34006929397583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.28594329953193665, "epoch": 9.6, "learning_rate": 2.22128299051376e-06, "loss": 0.4441, "step": 11357, "task_loss": 0.9000816345214844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5518999099731445, "epoch": 9.6, "learning_rate": 2.216586831971447e-06, "loss": 0.5262, "step": 11358, "task_loss": 0.5515723824501038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43517962098121643, "epoch": 9.6, "learning_rate": 2.211890673429135e-06, "loss": 0.445, "step": 11359, "task_loss": 0.8697996139526367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5823450684547424, "epoch": 9.6, "learning_rate": 2.2071945148868228e-06, "loss": 0.5219, "step": 11360, "task_loss": 0.7300214767456055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33340218663215637, "epoch": 9.6, "learning_rate": 2.2024983563445104e-06, "loss": 0.5358, "step": 11361, "task_loss": 0.576475977897644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27498751878738403, "epoch": 9.6, "learning_rate": 2.197802197802198e-06, "loss": 0.3161, "step": 11362, "task_loss": 0.2063002735376358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43435466289520264, "epoch": 9.6, "learning_rate": 2.1931060392598856e-06, "loss": 0.4225, "step": 11363, "task_loss": 0.257951945066452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8279045820236206, "epoch": 9.61, "learning_rate": 2.188409880717573e-06, "loss": 0.6623, "step": 11364, "task_loss": 0.9570447206497192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49469470977783203, "epoch": 9.61, "learning_rate": 2.183713722175261e-06, "loss": 0.4161, "step": 11365, "task_loss": 0.4699959456920624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8706459999084473, "epoch": 9.61, "learning_rate": 2.1790175636329484e-06, "loss": 0.7097, "step": 11366, "task_loss": 0.8282628059387207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.487196147441864, "epoch": 9.61, "learning_rate": 2.174321405090636e-06, "loss": 0.4317, "step": 11367, "task_loss": 0.5242340564727783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.25716182589530945, "epoch": 9.61, "learning_rate": 2.1696252465483236e-06, "loss": 0.3736, "step": 11368, "task_loss": 0.6106293201446533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.22057752311229706, "epoch": 9.61, "learning_rate": 2.1649290880060113e-06, "loss": 0.4062, "step": 11369, "task_loss": 0.049295250326395035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3360055387020111, "epoch": 9.61, "learning_rate": 2.160232929463699e-06, "loss": 0.4752, "step": 11370, "task_loss": 0.030150672420859337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5310614109039307, "epoch": 9.61, "learning_rate": 2.1555367709213865e-06, "loss": 0.5028, "step": 11371, "task_loss": 0.19002258777618408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8981598615646362, "epoch": 9.61, "learning_rate": 2.150840612379074e-06, "loss": 0.6326, "step": 11372, "task_loss": 0.9009372591972351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3571709990501404, "epoch": 9.61, "learning_rate": 2.1461444538367617e-06, "loss": 0.4106, "step": 11373, "task_loss": 0.5154172778129578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2619447410106659, "epoch": 9.61, "learning_rate": 2.1414482952944493e-06, "loss": 0.4492, "step": 11374, "task_loss": 0.40657472610473633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32814016938209534, "epoch": 9.61, "learning_rate": 2.136752136752137e-06, "loss": 0.5061, "step": 11375, "task_loss": 0.22110538184642792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3983132243156433, "epoch": 9.62, "learning_rate": 2.1320559782098245e-06, "loss": 0.5108, "step": 11376, "task_loss": 0.6028563976287842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.539918065071106, "epoch": 9.62, "learning_rate": 2.127359819667512e-06, "loss": 0.4842, "step": 11377, "task_loss": 0.8047811985015869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2548607587814331, "epoch": 9.62, "learning_rate": 2.1226636611251998e-06, "loss": 0.3424, "step": 11378, "task_loss": 0.45371749997138977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8250559568405151, "epoch": 9.62, "learning_rate": 2.1179675025828874e-06, "loss": 0.5183, "step": 11379, "task_loss": 0.5444222688674927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6974778771400452, "epoch": 9.62, "learning_rate": 2.113271344040575e-06, "loss": 0.5673, "step": 11380, "task_loss": 0.6716244220733643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4380393326282501, "epoch": 9.62, "learning_rate": 2.1085751854982626e-06, "loss": 0.4849, "step": 11381, "task_loss": 0.6142094731330872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6441769599914551, "epoch": 9.62, "learning_rate": 2.10387902695595e-06, "loss": 0.5908, "step": 11382, "task_loss": 0.6370658874511719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.511991560459137, "epoch": 9.62, "learning_rate": 2.099182868413638e-06, "loss": 0.6043, "step": 11383, "task_loss": 1.286627173423767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43715494871139526, "epoch": 9.62, "learning_rate": 2.0944867098713254e-06, "loss": 0.5558, "step": 11384, "task_loss": 0.7085151672363281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47504785656929016, "epoch": 9.62, "learning_rate": 2.0897905513290126e-06, "loss": 0.4983, "step": 11385, "task_loss": 0.7069205641746521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.776062548160553, "epoch": 9.62, "learning_rate": 2.0850943927867006e-06, "loss": 0.5104, "step": 11386, "task_loss": 0.8017794489860535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37915679812431335, "epoch": 9.63, "learning_rate": 2.0803982342443882e-06, "loss": 0.5633, "step": 11387, "task_loss": 0.5884358882904053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5737254023551941, "epoch": 9.63, "learning_rate": 2.075702075702076e-06, "loss": 0.5822, "step": 11388, "task_loss": 1.1390992403030396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7679910659790039, "epoch": 9.63, "learning_rate": 2.0710059171597635e-06, "loss": 0.5993, "step": 11389, "task_loss": 0.8572799563407898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38416674733161926, "epoch": 9.63, "learning_rate": 2.066309758617451e-06, "loss": 0.3671, "step": 11390, "task_loss": 0.45505815744400024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9478505849838257, "epoch": 9.63, "learning_rate": 2.0616136000751387e-06, "loss": 0.7755, "step": 11391, "task_loss": 1.587512493133545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5952091217041016, "epoch": 9.63, "learning_rate": 2.0569174415328263e-06, "loss": 0.5036, "step": 11392, "task_loss": 0.35364559292793274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2710678279399872, "epoch": 9.63, "learning_rate": 2.052221282990514e-06, "loss": 0.5794, "step": 11393, "task_loss": 0.6814355850219727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5481448173522949, "epoch": 9.63, "learning_rate": 2.0475251244482015e-06, "loss": 0.4183, "step": 11394, "task_loss": 0.8255208730697632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3358798623085022, "epoch": 9.63, "learning_rate": 2.042828965905889e-06, "loss": 0.5538, "step": 11395, "task_loss": 0.17713001370429993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2632208466529846, "epoch": 9.63, "learning_rate": 2.0381328073635767e-06, "loss": 0.4221, "step": 11396, "task_loss": 0.03643917664885521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6972920894622803, "epoch": 9.63, "learning_rate": 2.0334366488212643e-06, "loss": 0.5873, "step": 11397, "task_loss": 0.2945539355278015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6117405295372009, "epoch": 9.63, "learning_rate": 2.028740490278952e-06, "loss": 0.5412, "step": 11398, "task_loss": 0.9292134642601013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8162497878074646, "epoch": 9.64, "learning_rate": 2.0240443317366396e-06, "loss": 0.678, "step": 11399, "task_loss": 0.4524242877960205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.30858469009399414, "epoch": 9.64, "learning_rate": 2.019348173194327e-06, "loss": 0.4862, "step": 11400, "task_loss": 0.46161264181137085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4446924924850464, "epoch": 9.64, "learning_rate": 2.0146520146520148e-06, "loss": 0.4958, "step": 11401, "task_loss": 0.4677511751651764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3646039366722107, "epoch": 9.64, "learning_rate": 2.0099558561097024e-06, "loss": 0.4871, "step": 11402, "task_loss": 0.2900379002094269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5827575325965881, "epoch": 9.64, "learning_rate": 2.00525969756739e-06, "loss": 0.4527, "step": 11403, "task_loss": 0.7284025549888611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3891655206680298, "epoch": 9.64, "learning_rate": 2.0005635390250776e-06, "loss": 0.5763, "step": 11404, "task_loss": 0.5918285250663757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46287229657173157, "epoch": 9.64, "learning_rate": 1.9958673804827652e-06, "loss": 0.4535, "step": 11405, "task_loss": 0.6827713847160339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39512404799461365, "epoch": 9.64, "learning_rate": 1.991171221940453e-06, "loss": 0.4824, "step": 11406, "task_loss": 0.3780260682106018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34958314895629883, "epoch": 9.64, "learning_rate": 1.9864750633981404e-06, "loss": 0.6949, "step": 11407, "task_loss": 0.4690275490283966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7290666699409485, "epoch": 9.64, "learning_rate": 1.981778904855828e-06, "loss": 0.6734, "step": 11408, "task_loss": 0.6262463927268982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6450524926185608, "epoch": 9.64, "learning_rate": 1.9770827463135157e-06, "loss": 0.4526, "step": 11409, "task_loss": 0.8084776401519775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5209466218948364, "epoch": 9.64, "learning_rate": 1.9723865877712033e-06, "loss": 0.538, "step": 11410, "task_loss": 0.2751803696155548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3663901090621948, "epoch": 9.65, "learning_rate": 1.967690429228891e-06, "loss": 0.4553, "step": 11411, "task_loss": 0.1201891154050827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3921613097190857, "epoch": 9.65, "learning_rate": 1.9629942706865785e-06, "loss": 0.4083, "step": 11412, "task_loss": 0.8348675966262817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5544988512992859, "epoch": 9.65, "learning_rate": 1.958298112144266e-06, "loss": 0.5007, "step": 11413, "task_loss": 0.938339114189148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4500572085380554, "epoch": 9.65, "learning_rate": 1.9536019536019537e-06, "loss": 0.3735, "step": 11414, "task_loss": 0.055150169879198074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37548303604125977, "epoch": 9.65, "learning_rate": 1.9489057950596413e-06, "loss": 0.5311, "step": 11415, "task_loss": 0.69590163230896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43493419885635376, "epoch": 9.65, "learning_rate": 1.944209636517329e-06, "loss": 0.5483, "step": 11416, "task_loss": 0.24356773495674133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4841821789741516, "epoch": 9.65, "learning_rate": 1.9395134779750165e-06, "loss": 0.4659, "step": 11417, "task_loss": 0.46292781829833984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5528687238693237, "epoch": 9.65, "learning_rate": 1.934817319432704e-06, "loss": 0.4117, "step": 11418, "task_loss": 0.2459871470928192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3112121522426605, "epoch": 9.65, "learning_rate": 1.9301211608903918e-06, "loss": 0.5868, "step": 11419, "task_loss": 0.4667634069919586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7542078495025635, "epoch": 9.65, "learning_rate": 1.9254250023480794e-06, "loss": 0.6366, "step": 11420, "task_loss": 0.9502167105674744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5117038488388062, "epoch": 9.65, "learning_rate": 1.920728843805767e-06, "loss": 0.4138, "step": 11421, "task_loss": 0.7222166061401367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3389070928096771, "epoch": 9.65, "learning_rate": 1.9160326852634546e-06, "loss": 0.4318, "step": 11422, "task_loss": 0.07583770155906677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3692683279514313, "epoch": 9.66, "learning_rate": 1.911336526721142e-06, "loss": 0.561, "step": 11423, "task_loss": 0.2854543924331665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49918538331985474, "epoch": 9.66, "learning_rate": 1.90664036817883e-06, "loss": 0.4964, "step": 11424, "task_loss": 0.5617024898529053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.549014687538147, "epoch": 9.66, "learning_rate": 1.9019442096365174e-06, "loss": 0.5246, "step": 11425, "task_loss": 1.0923465490341187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3296261727809906, "epoch": 9.66, "learning_rate": 1.897248051094205e-06, "loss": 0.5636, "step": 11426, "task_loss": 0.34999504685401917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4123418927192688, "epoch": 9.66, "learning_rate": 1.8925518925518924e-06, "loss": 0.4474, "step": 11427, "task_loss": 0.5909363031387329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44264698028564453, "epoch": 9.66, "learning_rate": 1.8878557340095805e-06, "loss": 0.5437, "step": 11428, "task_loss": 0.3855496346950531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37073323130607605, "epoch": 9.66, "learning_rate": 1.8831595754672679e-06, "loss": 0.6323, "step": 11429, "task_loss": 1.0449588298797607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46683409810066223, "epoch": 9.66, "learning_rate": 1.8784634169249555e-06, "loss": 0.5529, "step": 11430, "task_loss": 1.0374109745025635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8051499128341675, "epoch": 9.66, "learning_rate": 1.8737672583826429e-06, "loss": 0.5599, "step": 11431, "task_loss": 0.6604653596878052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4702816605567932, "epoch": 9.66, "learning_rate": 1.869071099840331e-06, "loss": 0.4355, "step": 11432, "task_loss": 0.401755154132843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4778931140899658, "epoch": 9.66, "learning_rate": 1.8643749412980183e-06, "loss": 0.4668, "step": 11433, "task_loss": 1.3282161951065063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6465566158294678, "epoch": 9.66, "learning_rate": 1.859678782755706e-06, "loss": 0.6162, "step": 11434, "task_loss": 0.8264754414558411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42370858788490295, "epoch": 9.67, "learning_rate": 1.8549826242133933e-06, "loss": 0.4757, "step": 11435, "task_loss": 0.2752176523208618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3534660041332245, "epoch": 9.67, "learning_rate": 1.8502864656710811e-06, "loss": 0.49, "step": 11436, "task_loss": 0.48003533482551575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.61049485206604, "epoch": 9.67, "learning_rate": 1.8455903071287688e-06, "loss": 0.4793, "step": 11437, "task_loss": 1.111061930656433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.23898935317993164, "epoch": 9.67, "learning_rate": 1.8408941485864564e-06, "loss": 0.4251, "step": 11438, "task_loss": 0.13332347571849823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6359784007072449, "epoch": 9.67, "learning_rate": 1.8361979900441438e-06, "loss": 0.5412, "step": 11439, "task_loss": 1.466775894165039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.528633713722229, "epoch": 9.67, "learning_rate": 1.8315018315018316e-06, "loss": 0.5861, "step": 11440, "task_loss": 0.2812385857105255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6423138380050659, "epoch": 9.67, "learning_rate": 1.8268056729595192e-06, "loss": 0.5852, "step": 11441, "task_loss": 0.9537580609321594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3326289653778076, "epoch": 9.67, "learning_rate": 1.8221095144172068e-06, "loss": 0.6265, "step": 11442, "task_loss": 0.5414422154426575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5209968090057373, "epoch": 9.67, "learning_rate": 1.8174133558748946e-06, "loss": 0.5479, "step": 11443, "task_loss": 0.39431944489479065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8632389307022095, "epoch": 9.67, "learning_rate": 1.812717197332582e-06, "loss": 0.7125, "step": 11444, "task_loss": 1.0994718074798584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7477478981018066, "epoch": 9.67, "learning_rate": 1.8080210387902696e-06, "loss": 0.5109, "step": 11445, "task_loss": 1.1523531675338745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45790499448776245, "epoch": 9.67, "learning_rate": 1.8033248802479572e-06, "loss": 0.6308, "step": 11446, "task_loss": 0.42217889428138733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.669195830821991, "epoch": 9.68, "learning_rate": 1.798628721705645e-06, "loss": 0.6086, "step": 11447, "task_loss": 1.3851507902145386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.455788791179657, "epoch": 9.68, "learning_rate": 1.7939325631633325e-06, "loss": 0.4777, "step": 11448, "task_loss": 0.11748513579368591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3722623884677887, "epoch": 9.68, "learning_rate": 1.78923640462102e-06, "loss": 0.5062, "step": 11449, "task_loss": 0.26742619276046753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2201627641916275, "epoch": 9.68, "learning_rate": 1.7845402460787075e-06, "loss": 0.6149, "step": 11450, "task_loss": 0.7989466786384583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41535666584968567, "epoch": 9.68, "learning_rate": 1.7798440875363955e-06, "loss": 0.4795, "step": 11451, "task_loss": 0.9794417023658752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27511340379714966, "epoch": 9.68, "learning_rate": 1.775147928994083e-06, "loss": 0.4937, "step": 11452, "task_loss": 0.38129672408103943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5654014945030212, "epoch": 9.68, "learning_rate": 1.7704517704517705e-06, "loss": 0.5297, "step": 11453, "task_loss": 1.1619906425476074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.659551739692688, "epoch": 9.68, "learning_rate": 1.765755611909458e-06, "loss": 0.4816, "step": 11454, "task_loss": 0.44590631127357483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7365339398384094, "epoch": 9.68, "learning_rate": 1.761059453367146e-06, "loss": 0.6164, "step": 11455, "task_loss": 0.5907418727874756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4557931423187256, "epoch": 9.68, "learning_rate": 1.7563632948248333e-06, "loss": 0.5987, "step": 11456, "task_loss": 0.5244191288948059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8812709450721741, "epoch": 9.68, "learning_rate": 1.751667136282521e-06, "loss": 0.659, "step": 11457, "task_loss": 0.40603041648864746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2979689836502075, "epoch": 9.69, "learning_rate": 1.7469709777402084e-06, "loss": 0.4333, "step": 11458, "task_loss": 0.5775185227394104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5468404293060303, "epoch": 9.69, "learning_rate": 1.7422748191978964e-06, "loss": 0.4091, "step": 11459, "task_loss": 0.18960924446582794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3669789433479309, "epoch": 9.69, "learning_rate": 1.7375786606555838e-06, "loss": 0.5098, "step": 11460, "task_loss": 0.7717580795288086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3639047145843506, "epoch": 9.69, "learning_rate": 1.7328825021132714e-06, "loss": 0.4741, "step": 11461, "task_loss": 1.2979401350021362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3938557505607605, "epoch": 9.69, "learning_rate": 1.7281863435709588e-06, "loss": 0.5513, "step": 11462, "task_loss": 0.5103908777236938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48986607789993286, "epoch": 9.69, "learning_rate": 1.7234901850286468e-06, "loss": 0.5257, "step": 11463, "task_loss": 0.19029080867767334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45809322595596313, "epoch": 9.69, "learning_rate": 1.7187940264863342e-06, "loss": 0.5898, "step": 11464, "task_loss": 0.2553292512893677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6136647462844849, "epoch": 9.69, "learning_rate": 1.7140978679440218e-06, "loss": 0.5839, "step": 11465, "task_loss": 0.8344267010688782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4086958169937134, "epoch": 9.69, "learning_rate": 1.7094017094017097e-06, "loss": 0.4834, "step": 11466, "task_loss": 0.39553430676460266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3581584393978119, "epoch": 9.69, "learning_rate": 1.7047055508593973e-06, "loss": 0.5424, "step": 11467, "task_loss": 0.20823214948177338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.709089994430542, "epoch": 9.69, "learning_rate": 1.7000093923170847e-06, "loss": 0.6412, "step": 11468, "task_loss": 1.0438073873519897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6086224913597107, "epoch": 9.69, "learning_rate": 1.6953132337747723e-06, "loss": 0.5854, "step": 11469, "task_loss": 1.092690110206604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.111971378326416, "epoch": 9.7, "learning_rate": 1.69061707523246e-06, "loss": 0.6212, "step": 11470, "task_loss": 1.2562836408615112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47859475016593933, "epoch": 9.7, "learning_rate": 1.6859209166901477e-06, "loss": 0.5991, "step": 11471, "task_loss": 0.334189772605896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33578816056251526, "epoch": 9.7, "learning_rate": 1.6812247581478351e-06, "loss": 0.4203, "step": 11472, "task_loss": 0.49818506836891174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5086127519607544, "epoch": 9.7, "learning_rate": 1.6765285996055227e-06, "loss": 0.6032, "step": 11473, "task_loss": 0.8147891759872437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44872742891311646, "epoch": 9.7, "learning_rate": 1.6718324410632105e-06, "loss": 0.4625, "step": 11474, "task_loss": 0.1541370302438736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7632561326026917, "epoch": 9.7, "learning_rate": 1.667136282520898e-06, "loss": 0.5877, "step": 11475, "task_loss": 0.5513964891433716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6939810514450073, "epoch": 9.7, "learning_rate": 1.6624401239785856e-06, "loss": 0.5537, "step": 11476, "task_loss": 0.6401389837265015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3860637843608856, "epoch": 9.7, "learning_rate": 1.6577439654362732e-06, "loss": 0.4945, "step": 11477, "task_loss": 0.2748696804046631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33206045627593994, "epoch": 9.7, "learning_rate": 1.653047806893961e-06, "loss": 0.5256, "step": 11478, "task_loss": 0.4516805410385132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6431218385696411, "epoch": 9.7, "learning_rate": 1.6483516483516484e-06, "loss": 0.5229, "step": 11479, "task_loss": 0.8697951436042786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7676568627357483, "epoch": 9.7, "learning_rate": 1.643655489809336e-06, "loss": 0.6034, "step": 11480, "task_loss": 1.118166208267212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.587255597114563, "epoch": 9.7, "learning_rate": 1.6389593312670236e-06, "loss": 0.5289, "step": 11481, "task_loss": 0.9892369508743286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7116389274597168, "epoch": 9.71, "learning_rate": 1.6342631727247114e-06, "loss": 0.5578, "step": 11482, "task_loss": 0.9225607514381409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3698551058769226, "epoch": 9.71, "learning_rate": 1.6295670141823988e-06, "loss": 0.4876, "step": 11483, "task_loss": 1.2351182699203491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6666916608810425, "epoch": 9.71, "learning_rate": 1.6248708556400864e-06, "loss": 0.8062, "step": 11484, "task_loss": 1.1383987665176392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6063796877861023, "epoch": 9.71, "learning_rate": 1.620174697097774e-06, "loss": 0.5094, "step": 11485, "task_loss": 0.7495008111000061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4437066614627838, "epoch": 9.71, "learning_rate": 1.6154785385554619e-06, "loss": 0.538, "step": 11486, "task_loss": 1.8862847089767456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5137914419174194, "epoch": 9.71, "learning_rate": 1.6107823800131493e-06, "loss": 0.624, "step": 11487, "task_loss": 0.5614006519317627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8059605360031128, "epoch": 9.71, "learning_rate": 1.6060862214708369e-06, "loss": 0.5629, "step": 11488, "task_loss": 1.3687599897384644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5678377151489258, "epoch": 9.71, "learning_rate": 1.6013900629285243e-06, "loss": 0.5325, "step": 11489, "task_loss": 0.5617057085037231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6493019461631775, "epoch": 9.71, "learning_rate": 1.5966939043862123e-06, "loss": 0.7129, "step": 11490, "task_loss": 0.5616776943206787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36767578125, "epoch": 9.71, "learning_rate": 1.5919977458438997e-06, "loss": 0.5281, "step": 11491, "task_loss": 0.32768571376800537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4802798330783844, "epoch": 9.71, "learning_rate": 1.5873015873015873e-06, "loss": 0.4708, "step": 11492, "task_loss": 0.44748640060424805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.495869517326355, "epoch": 9.71, "learning_rate": 1.5826054287592751e-06, "loss": 0.5563, "step": 11493, "task_loss": 0.4884212017059326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7841111421585083, "epoch": 9.72, "learning_rate": 1.5779092702169627e-06, "loss": 0.601, "step": 11494, "task_loss": 1.4325422048568726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3211894929409027, "epoch": 9.72, "learning_rate": 1.5732131116746501e-06, "loss": 0.5537, "step": 11495, "task_loss": 0.7646780610084534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29383453726768494, "epoch": 9.72, "learning_rate": 1.5685169531323378e-06, "loss": 0.5556, "step": 11496, "task_loss": 0.49611806869506836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5704091787338257, "epoch": 9.72, "learning_rate": 1.5638207945900256e-06, "loss": 0.534, "step": 11497, "task_loss": 0.9653008580207825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41487741470336914, "epoch": 9.72, "learning_rate": 1.5591246360477132e-06, "loss": 0.5678, "step": 11498, "task_loss": 0.33534494042396545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6436388492584229, "epoch": 9.72, "learning_rate": 1.5544284775054006e-06, "loss": 0.4495, "step": 11499, "task_loss": 0.2991407513618469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4911736249923706, "epoch": 9.72, "learning_rate": 1.5497323189630884e-06, "loss": 0.5615, "step": 11500, "task_loss": 1.0525355339050293 }, { "epoch": 9.72, "eval_accuracy": 0.9061386138613862, "eval_loss": 0.3415951728820801, "eval_runtime": 224.6089, "eval_samples_per_second": 112.418, "eval_steps_per_second": 0.882, "step": 11500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.536419153213501, "epoch": 9.72, "learning_rate": 1.5450361604207758e-06, "loss": 0.5526, "step": 11501, "task_loss": 0.2670529782772064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4322338104248047, "epoch": 9.72, "learning_rate": 1.5403400018784636e-06, "loss": 0.4045, "step": 11502, "task_loss": 0.24652105569839478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.415252685546875, "epoch": 9.72, "learning_rate": 1.535643843336151e-06, "loss": 0.469, "step": 11503, "task_loss": 0.6024048924446106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.587314248085022, "epoch": 9.72, "learning_rate": 1.5309476847938389e-06, "loss": 0.5521, "step": 11504, "task_loss": 0.07074660807847977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.452799916267395, "epoch": 9.72, "learning_rate": 1.5262515262515263e-06, "loss": 0.4388, "step": 11505, "task_loss": 1.0037128925323486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.24308723211288452, "epoch": 9.73, "learning_rate": 1.521555367709214e-06, "loss": 0.3355, "step": 11506, "task_loss": 0.145368754863739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7318060994148254, "epoch": 9.73, "learning_rate": 1.5168592091669015e-06, "loss": 0.4642, "step": 11507, "task_loss": 0.5483699440956116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5375053882598877, "epoch": 9.73, "learning_rate": 1.5121630506245893e-06, "loss": 0.5037, "step": 11508, "task_loss": 1.1170408725738525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27880626916885376, "epoch": 9.73, "learning_rate": 1.5074668920822767e-06, "loss": 0.4055, "step": 11509, "task_loss": 0.14278464019298553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5248398780822754, "epoch": 9.73, "learning_rate": 1.5027707335399643e-06, "loss": 0.6966, "step": 11510, "task_loss": 0.5878928303718567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.661011815071106, "epoch": 9.73, "learning_rate": 1.498074574997652e-06, "loss": 0.6803, "step": 11511, "task_loss": 0.9294087290763855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5192436575889587, "epoch": 9.73, "learning_rate": 1.4933784164553395e-06, "loss": 0.4903, "step": 11512, "task_loss": 0.2373286634683609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5579653978347778, "epoch": 9.73, "learning_rate": 1.4886822579130271e-06, "loss": 0.6285, "step": 11513, "task_loss": 0.23283642530441284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3218482732772827, "epoch": 9.73, "learning_rate": 1.4839860993707147e-06, "loss": 0.5535, "step": 11514, "task_loss": 0.12274415045976639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2762157618999481, "epoch": 9.73, "learning_rate": 1.4792899408284024e-06, "loss": 0.381, "step": 11515, "task_loss": 0.3776683509349823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40191030502319336, "epoch": 9.73, "learning_rate": 1.47459378228609e-06, "loss": 0.456, "step": 11516, "task_loss": 0.23304127156734467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40373021364212036, "epoch": 9.73, "learning_rate": 1.4698976237437776e-06, "loss": 0.4648, "step": 11517, "task_loss": 1.3059436082839966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3305944800376892, "epoch": 9.74, "learning_rate": 1.4652014652014652e-06, "loss": 0.482, "step": 11518, "task_loss": 0.8826178312301636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5002104043960571, "epoch": 9.74, "learning_rate": 1.460505306659153e-06, "loss": 0.5864, "step": 11519, "task_loss": 0.4279315173625946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.23471489548683167, "epoch": 9.74, "learning_rate": 1.4558091481168404e-06, "loss": 0.4815, "step": 11520, "task_loss": 0.06021621823310852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38295620679855347, "epoch": 9.74, "learning_rate": 1.4511129895745282e-06, "loss": 0.4574, "step": 11521, "task_loss": 0.23298221826553345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7968490123748779, "epoch": 9.74, "learning_rate": 1.4464168310322156e-06, "loss": 0.6352, "step": 11522, "task_loss": 0.4866791069507599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6151580214500427, "epoch": 9.74, "learning_rate": 1.4417206724899034e-06, "loss": 0.5375, "step": 11523, "task_loss": 0.5411332249641418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.367996484041214, "epoch": 9.74, "learning_rate": 1.4370245139475908e-06, "loss": 0.4891, "step": 11524, "task_loss": 0.5383023023605347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5294502377510071, "epoch": 9.74, "learning_rate": 1.4323283554052787e-06, "loss": 0.5084, "step": 11525, "task_loss": 0.923517107963562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5568013787269592, "epoch": 9.74, "learning_rate": 1.427632196862966e-06, "loss": 0.5484, "step": 11526, "task_loss": 0.8762422800064087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6286636590957642, "epoch": 9.74, "learning_rate": 1.4229360383206539e-06, "loss": 0.5498, "step": 11527, "task_loss": 0.4279549717903137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4115074872970581, "epoch": 9.74, "learning_rate": 1.4182398797783413e-06, "loss": 0.5179, "step": 11528, "task_loss": 1.3315378427505493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46236300468444824, "epoch": 9.75, "learning_rate": 1.4135437212360291e-06, "loss": 0.4249, "step": 11529, "task_loss": 0.46879643201828003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5736899375915527, "epoch": 9.75, "learning_rate": 1.4088475626937165e-06, "loss": 0.4196, "step": 11530, "task_loss": 0.48221516609191895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4326358437538147, "epoch": 9.75, "learning_rate": 1.4041514041514043e-06, "loss": 0.4635, "step": 11531, "task_loss": 0.6761982440948486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3569113612174988, "epoch": 9.75, "learning_rate": 1.3994552456090917e-06, "loss": 0.5076, "step": 11532, "task_loss": 0.3585807681083679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8517415523529053, "epoch": 9.75, "learning_rate": 1.3947590870667795e-06, "loss": 0.5814, "step": 11533, "task_loss": 1.023654818534851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3766542673110962, "epoch": 9.75, "learning_rate": 1.390062928524467e-06, "loss": 0.5035, "step": 11534, "task_loss": 0.20044834911823273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5997095108032227, "epoch": 9.75, "learning_rate": 1.3853667699821548e-06, "loss": 0.4727, "step": 11535, "task_loss": 1.148977518081665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3635013699531555, "epoch": 9.75, "learning_rate": 1.3806706114398422e-06, "loss": 0.4811, "step": 11536, "task_loss": 0.49623987078666687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5407978296279907, "epoch": 9.75, "learning_rate": 1.37597445289753e-06, "loss": 0.5485, "step": 11537, "task_loss": 0.9316269755363464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.0843470096588135, "epoch": 9.75, "learning_rate": 1.3712782943552174e-06, "loss": 0.7016, "step": 11538, "task_loss": 1.409144401550293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4879947006702423, "epoch": 9.75, "learning_rate": 1.3665821358129052e-06, "loss": 0.5649, "step": 11539, "task_loss": 0.16611556708812714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38130074739456177, "epoch": 9.75, "learning_rate": 1.3618859772705926e-06, "loss": 0.5176, "step": 11540, "task_loss": 0.7015112638473511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3969789743423462, "epoch": 9.76, "learning_rate": 1.3571898187282804e-06, "loss": 0.6118, "step": 11541, "task_loss": 0.27465203404426575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5651055574417114, "epoch": 9.76, "learning_rate": 1.3524936601859678e-06, "loss": 0.6577, "step": 11542, "task_loss": 1.5608183145523071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43580275774002075, "epoch": 9.76, "learning_rate": 1.3477975016436557e-06, "loss": 0.5016, "step": 11543, "task_loss": 0.6279470324516296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33443325757980347, "epoch": 9.76, "learning_rate": 1.3431013431013433e-06, "loss": 0.4099, "step": 11544, "task_loss": 0.7880122661590576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47935429215431213, "epoch": 9.76, "learning_rate": 1.3384051845590309e-06, "loss": 0.4684, "step": 11545, "task_loss": 0.7952564358711243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6020100116729736, "epoch": 9.76, "learning_rate": 1.3337090260167185e-06, "loss": 0.5943, "step": 11546, "task_loss": 0.8855083584785461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4070062041282654, "epoch": 9.76, "learning_rate": 1.329012867474406e-06, "loss": 0.4909, "step": 11547, "task_loss": 0.550168514251709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4402919411659241, "epoch": 9.76, "learning_rate": 1.3243167089320937e-06, "loss": 0.5212, "step": 11548, "task_loss": 0.20472452044487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5525937080383301, "epoch": 9.76, "learning_rate": 1.319620550389781e-06, "loss": 0.5254, "step": 11549, "task_loss": 0.8967655301094055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7250456809997559, "epoch": 9.76, "learning_rate": 1.314924391847469e-06, "loss": 0.6108, "step": 11550, "task_loss": 1.1153883934020996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5867980718612671, "epoch": 9.76, "learning_rate": 1.3102282333051563e-06, "loss": 0.5058, "step": 11551, "task_loss": 0.5073431134223938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5305205583572388, "epoch": 9.76, "learning_rate": 1.3055320747628441e-06, "loss": 0.5932, "step": 11552, "task_loss": 1.1645997762680054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3018990755081177, "epoch": 9.77, "learning_rate": 1.3008359162205315e-06, "loss": 0.4551, "step": 11553, "task_loss": 1.0138224363327026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4449269771575928, "epoch": 9.77, "learning_rate": 1.2961397576782194e-06, "loss": 0.4017, "step": 11554, "task_loss": 0.6882416009902954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.626630961894989, "epoch": 9.77, "learning_rate": 1.2914435991359068e-06, "loss": 0.4924, "step": 11555, "task_loss": 0.7938455939292908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5032488107681274, "epoch": 9.77, "learning_rate": 1.2867474405935946e-06, "loss": 0.4821, "step": 11556, "task_loss": 0.38939177989959717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7005398869514465, "epoch": 9.77, "learning_rate": 1.282051282051282e-06, "loss": 0.6017, "step": 11557, "task_loss": 0.7876736521720886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8690314292907715, "epoch": 9.77, "learning_rate": 1.2773551235089698e-06, "loss": 0.7441, "step": 11558, "task_loss": 0.6200482249259949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5403956174850464, "epoch": 9.77, "learning_rate": 1.2726589649666572e-06, "loss": 0.6334, "step": 11559, "task_loss": 0.8452075123786926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4016724228858948, "epoch": 9.77, "learning_rate": 1.267962806424345e-06, "loss": 0.4887, "step": 11560, "task_loss": 0.4933372437953949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6579176187515259, "epoch": 9.77, "learning_rate": 1.2632666478820324e-06, "loss": 0.4528, "step": 11561, "task_loss": 0.7352555394172668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5601887106895447, "epoch": 9.77, "learning_rate": 1.2585704893397202e-06, "loss": 0.7422, "step": 11562, "task_loss": 0.3754863440990448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4142143130302429, "epoch": 9.77, "learning_rate": 1.2538743307974076e-06, "loss": 0.5369, "step": 11563, "task_loss": 0.9194085597991943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44838717579841614, "epoch": 9.77, "learning_rate": 1.2491781722550955e-06, "loss": 0.4983, "step": 11564, "task_loss": 0.18820162117481232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6209500432014465, "epoch": 9.78, "learning_rate": 1.2444820137127829e-06, "loss": 0.5916, "step": 11565, "task_loss": 0.7315186858177185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3877983093261719, "epoch": 9.78, "learning_rate": 1.2397858551704707e-06, "loss": 0.3557, "step": 11566, "task_loss": 0.6803447008132935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33146870136260986, "epoch": 9.78, "learning_rate": 1.235089696628158e-06, "loss": 0.5408, "step": 11567, "task_loss": 0.3148934841156006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.571172297000885, "epoch": 9.78, "learning_rate": 1.230393538085846e-06, "loss": 0.4837, "step": 11568, "task_loss": 1.6030389070510864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5889254212379456, "epoch": 9.78, "learning_rate": 1.2256973795435333e-06, "loss": 0.5397, "step": 11569, "task_loss": 1.0721043348312378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.544925332069397, "epoch": 9.78, "learning_rate": 1.2210012210012211e-06, "loss": 0.6219, "step": 11570, "task_loss": 1.689197063446045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6150897741317749, "epoch": 9.78, "learning_rate": 1.2163050624589087e-06, "loss": 0.4739, "step": 11571, "task_loss": 0.336158812046051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9247380495071411, "epoch": 9.78, "learning_rate": 1.2116089039165963e-06, "loss": 0.688, "step": 11572, "task_loss": 1.2681623697280884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26835328340530396, "epoch": 9.78, "learning_rate": 1.206912745374284e-06, "loss": 0.3035, "step": 11573, "task_loss": 0.18439337611198425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4227977693080902, "epoch": 9.78, "learning_rate": 1.2022165868319716e-06, "loss": 0.4274, "step": 11574, "task_loss": 0.7617286443710327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4625522494316101, "epoch": 9.78, "learning_rate": 1.1975204282896592e-06, "loss": 0.4679, "step": 11575, "task_loss": 0.4170764684677124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5006260275840759, "epoch": 9.78, "learning_rate": 1.1928242697473468e-06, "loss": 0.5522, "step": 11576, "task_loss": 1.0208497047424316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3446034789085388, "epoch": 9.79, "learning_rate": 1.1881281112050344e-06, "loss": 0.4832, "step": 11577, "task_loss": 0.5174049139022827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5289309024810791, "epoch": 9.79, "learning_rate": 1.183431952662722e-06, "loss": 0.4853, "step": 11578, "task_loss": 0.23772235214710236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6487652063369751, "epoch": 9.79, "learning_rate": 1.1787357941204096e-06, "loss": 0.5435, "step": 11579, "task_loss": 0.5900980830192566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5232130289077759, "epoch": 9.79, "learning_rate": 1.1740396355780972e-06, "loss": 0.5117, "step": 11580, "task_loss": 0.7497355937957764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3920536935329437, "epoch": 9.79, "learning_rate": 1.1693434770357848e-06, "loss": 0.5625, "step": 11581, "task_loss": 0.3917301893234253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5689412355422974, "epoch": 9.79, "learning_rate": 1.1646473184934725e-06, "loss": 0.4941, "step": 11582, "task_loss": 0.931212842464447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3884715437889099, "epoch": 9.79, "learning_rate": 1.15995115995116e-06, "loss": 0.5843, "step": 11583, "task_loss": 0.08893343806266785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6844034194946289, "epoch": 9.79, "learning_rate": 1.1552550014088477e-06, "loss": 0.5474, "step": 11584, "task_loss": 0.8289682865142822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46304529905319214, "epoch": 9.79, "learning_rate": 1.1505588428665353e-06, "loss": 0.5426, "step": 11585, "task_loss": 0.3210832476615906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6277182102203369, "epoch": 9.79, "learning_rate": 1.1458626843242229e-06, "loss": 0.5495, "step": 11586, "task_loss": 0.5653250217437744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44540345668792725, "epoch": 9.79, "learning_rate": 1.1411665257819105e-06, "loss": 0.5444, "step": 11587, "task_loss": 1.1288255453109741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44795167446136475, "epoch": 9.79, "learning_rate": 1.136470367239598e-06, "loss": 0.4596, "step": 11588, "task_loss": 0.40191465616226196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6589836478233337, "epoch": 9.8, "learning_rate": 1.1317742086972857e-06, "loss": 0.5359, "step": 11589, "task_loss": 0.5444157123565674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8617986440658569, "epoch": 9.8, "learning_rate": 1.1270780501549731e-06, "loss": 0.6437, "step": 11590, "task_loss": 0.6680616736412048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26637667417526245, "epoch": 9.8, "learning_rate": 1.122381891612661e-06, "loss": 0.5383, "step": 11591, "task_loss": 0.14676056802272797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.20641061663627625, "epoch": 9.8, "learning_rate": 1.1176857330703483e-06, "loss": 0.4396, "step": 11592, "task_loss": 0.45607540011405945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4385773241519928, "epoch": 9.8, "learning_rate": 1.1129895745280362e-06, "loss": 0.4716, "step": 11593, "task_loss": 0.2028522789478302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5597678422927856, "epoch": 9.8, "learning_rate": 1.1082934159857236e-06, "loss": 0.5329, "step": 11594, "task_loss": 0.9714021682739258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44276562333106995, "epoch": 9.8, "learning_rate": 1.1035972574434114e-06, "loss": 0.4056, "step": 11595, "task_loss": 0.37057822942733765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36304810643196106, "epoch": 9.8, "learning_rate": 1.098901098901099e-06, "loss": 0.5066, "step": 11596, "task_loss": 2.0307114124298096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3773888647556305, "epoch": 9.8, "learning_rate": 1.0942049403587866e-06, "loss": 0.4297, "step": 11597, "task_loss": 0.558975100517273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3911738991737366, "epoch": 9.8, "learning_rate": 1.0895087818164742e-06, "loss": 0.6273, "step": 11598, "task_loss": 0.9880772829055786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3108189105987549, "epoch": 9.8, "learning_rate": 1.0848126232741618e-06, "loss": 0.5208, "step": 11599, "task_loss": 0.5293310880661011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31946343183517456, "epoch": 9.81, "learning_rate": 1.0801164647318494e-06, "loss": 0.5207, "step": 11600, "task_loss": 0.28310585021972656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44380831718444824, "epoch": 9.81, "learning_rate": 1.075420306189537e-06, "loss": 0.562, "step": 11601, "task_loss": 0.5917456746101379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4862491488456726, "epoch": 9.81, "learning_rate": 1.0707241476472247e-06, "loss": 0.4929, "step": 11602, "task_loss": 0.15017296373844147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7555322647094727, "epoch": 9.81, "learning_rate": 1.0660279891049123e-06, "loss": 0.6016, "step": 11603, "task_loss": 0.17325359582901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.36063718795776367, "epoch": 9.81, "learning_rate": 1.0613318305625999e-06, "loss": 0.4685, "step": 11604, "task_loss": 0.4569324553012848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.34156954288482666, "epoch": 9.81, "learning_rate": 1.0566356720202875e-06, "loss": 0.5491, "step": 11605, "task_loss": 0.24537602066993713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46593397855758667, "epoch": 9.81, "learning_rate": 1.051939513477975e-06, "loss": 0.5849, "step": 11606, "task_loss": 0.423801451921463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33818531036376953, "epoch": 9.81, "learning_rate": 1.0472433549356627e-06, "loss": 0.491, "step": 11607, "task_loss": 0.5609651207923889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5181142091751099, "epoch": 9.81, "learning_rate": 1.0425471963933503e-06, "loss": 0.6, "step": 11608, "task_loss": 0.5242823362350464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.267337828874588, "epoch": 9.81, "learning_rate": 1.037851037851038e-06, "loss": 0.5647, "step": 11609, "task_loss": 0.26684239506721497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4985196590423584, "epoch": 9.81, "learning_rate": 1.0331548793087255e-06, "loss": 0.4883, "step": 11610, "task_loss": 0.6232138276100159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27762371301651, "epoch": 9.81, "learning_rate": 1.0284587207664131e-06, "loss": 0.4237, "step": 11611, "task_loss": 0.48498740792274475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.28637561202049255, "epoch": 9.82, "learning_rate": 1.0237625622241008e-06, "loss": 0.5427, "step": 11612, "task_loss": 0.1969769448041916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4311421513557434, "epoch": 9.82, "learning_rate": 1.0190664036817884e-06, "loss": 0.4148, "step": 11613, "task_loss": 0.38214215636253357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45444947481155396, "epoch": 9.82, "learning_rate": 1.014370245139476e-06, "loss": 0.4547, "step": 11614, "task_loss": 0.9136221408843994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5215544104576111, "epoch": 9.82, "learning_rate": 1.0096740865971636e-06, "loss": 0.5983, "step": 11615, "task_loss": 0.8444485664367676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.27286145091056824, "epoch": 9.82, "learning_rate": 1.0049779280548512e-06, "loss": 0.4751, "step": 11616, "task_loss": 0.7143582701683044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38364529609680176, "epoch": 9.82, "learning_rate": 1.0002817695125388e-06, "loss": 0.462, "step": 11617, "task_loss": 0.11121365427970886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5070534944534302, "epoch": 9.82, "learning_rate": 9.955856109702264e-07, "loss": 0.4024, "step": 11618, "task_loss": 0.7544901967048645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6013320088386536, "epoch": 9.82, "learning_rate": 9.90889452427914e-07, "loss": 0.4578, "step": 11619, "task_loss": 0.6947131156921387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35890811681747437, "epoch": 9.82, "learning_rate": 9.861932938856016e-07, "loss": 0.4689, "step": 11620, "task_loss": 1.1796942949295044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5504288077354431, "epoch": 9.82, "learning_rate": 9.814971353432892e-07, "loss": 0.6366, "step": 11621, "task_loss": 0.6118441820144653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7418869733810425, "epoch": 9.82, "learning_rate": 9.768009768009769e-07, "loss": 0.5803, "step": 11622, "task_loss": 0.48447319865226746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4025713801383972, "epoch": 9.82, "learning_rate": 9.721048182586645e-07, "loss": 0.4938, "step": 11623, "task_loss": 0.4145151376724243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5665604472160339, "epoch": 9.83, "learning_rate": 9.67408659716352e-07, "loss": 0.4787, "step": 11624, "task_loss": 1.0815913677215576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3895834982395172, "epoch": 9.83, "learning_rate": 9.627125011740397e-07, "loss": 0.5279, "step": 11625, "task_loss": 0.2757743299007416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.511818528175354, "epoch": 9.83, "learning_rate": 9.580163426317273e-07, "loss": 0.5422, "step": 11626, "task_loss": 0.9653855562210083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45784229040145874, "epoch": 9.83, "learning_rate": 9.53320184089415e-07, "loss": 0.4883, "step": 11627, "task_loss": 0.582007646560669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3749268651008606, "epoch": 9.83, "learning_rate": 9.486240255471025e-07, "loss": 0.5342, "step": 11628, "task_loss": 0.9960128664970398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4508403539657593, "epoch": 9.83, "learning_rate": 9.439278670047902e-07, "loss": 0.4836, "step": 11629, "task_loss": 1.097683310508728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6140660643577576, "epoch": 9.83, "learning_rate": 9.392317084624777e-07, "loss": 0.5035, "step": 11630, "task_loss": 0.41548144817352295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37125739455223083, "epoch": 9.83, "learning_rate": 9.345355499201655e-07, "loss": 0.4768, "step": 11631, "task_loss": 0.4238901436328888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6871166825294495, "epoch": 9.83, "learning_rate": 9.29839391377853e-07, "loss": 0.5193, "step": 11632, "task_loss": 0.49430394172668457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4752940535545349, "epoch": 9.83, "learning_rate": 9.251432328355406e-07, "loss": 0.3825, "step": 11633, "task_loss": 0.20726190507411957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6051623821258545, "epoch": 9.83, "learning_rate": 9.204470742932282e-07, "loss": 0.5746, "step": 11634, "task_loss": 0.4666341543197632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40453529357910156, "epoch": 9.83, "learning_rate": 9.157509157509158e-07, "loss": 0.4555, "step": 11635, "task_loss": 0.1532047539949417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45161616802215576, "epoch": 9.84, "learning_rate": 9.110547572086034e-07, "loss": 0.5351, "step": 11636, "task_loss": 0.6374974846839905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33388155698776245, "epoch": 9.84, "learning_rate": 9.06358598666291e-07, "loss": 0.3217, "step": 11637, "task_loss": 1.0119853019714355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7127701044082642, "epoch": 9.84, "learning_rate": 9.016624401239786e-07, "loss": 0.5168, "step": 11638, "task_loss": 0.9383933544158936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2674983739852905, "epoch": 9.84, "learning_rate": 8.969662815816662e-07, "loss": 0.4574, "step": 11639, "task_loss": 0.26306337118148804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8469561338424683, "epoch": 9.84, "learning_rate": 8.922701230393537e-07, "loss": 0.4814, "step": 11640, "task_loss": 0.7896645069122314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37242329120635986, "epoch": 9.84, "learning_rate": 8.875739644970415e-07, "loss": 0.5722, "step": 11641, "task_loss": 0.6436479091644287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6010993123054504, "epoch": 9.84, "learning_rate": 8.82877805954729e-07, "loss": 0.6386, "step": 11642, "task_loss": 0.21855804324150085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.351081907749176, "epoch": 9.84, "learning_rate": 8.781816474124167e-07, "loss": 0.3809, "step": 11643, "task_loss": 0.1526072919368744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4876610338687897, "epoch": 9.84, "learning_rate": 8.734854888701042e-07, "loss": 0.4055, "step": 11644, "task_loss": 0.5989089608192444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5382118225097656, "epoch": 9.84, "learning_rate": 8.687893303277919e-07, "loss": 0.4253, "step": 11645, "task_loss": 1.0953598022460938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5348849296569824, "epoch": 9.84, "learning_rate": 8.640931717854794e-07, "loss": 0.7802, "step": 11646, "task_loss": 0.3564562499523163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3404490351676941, "epoch": 9.84, "learning_rate": 8.593970132431671e-07, "loss": 0.4032, "step": 11647, "task_loss": 0.27700087428092957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3653859496116638, "epoch": 9.85, "learning_rate": 8.547008547008548e-07, "loss": 0.4001, "step": 11648, "task_loss": 0.3649093806743622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.30619192123413086, "epoch": 9.85, "learning_rate": 8.500046961585423e-07, "loss": 0.4533, "step": 11649, "task_loss": 0.20874018967151642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.537000298500061, "epoch": 9.85, "learning_rate": 8.4530853761623e-07, "loss": 0.4917, "step": 11650, "task_loss": 1.7625828981399536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7100646495819092, "epoch": 9.85, "learning_rate": 8.406123790739176e-07, "loss": 0.5398, "step": 11651, "task_loss": 0.7236325740814209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45815837383270264, "epoch": 9.85, "learning_rate": 8.359162205316053e-07, "loss": 0.4501, "step": 11652, "task_loss": 1.3020293712615967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5586444139480591, "epoch": 9.85, "learning_rate": 8.312200619892928e-07, "loss": 0.5558, "step": 11653, "task_loss": 0.501620352268219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46564939618110657, "epoch": 9.85, "learning_rate": 8.265239034469805e-07, "loss": 0.5335, "step": 11654, "task_loss": 1.0236791372299194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5266822576522827, "epoch": 9.85, "learning_rate": 8.21827744904668e-07, "loss": 0.5012, "step": 11655, "task_loss": 0.9320570826530457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.615410566329956, "epoch": 9.85, "learning_rate": 8.171315863623557e-07, "loss": 0.4802, "step": 11656, "task_loss": 1.0972665548324585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6242597699165344, "epoch": 9.85, "learning_rate": 8.124354278200432e-07, "loss": 0.6379, "step": 11657, "task_loss": 0.28658729791641235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6801984906196594, "epoch": 9.85, "learning_rate": 8.077392692777309e-07, "loss": 0.6206, "step": 11658, "task_loss": 0.8452848792076111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41441917419433594, "epoch": 9.85, "learning_rate": 8.030431107354184e-07, "loss": 0.5874, "step": 11659, "task_loss": 0.7407779693603516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5799447298049927, "epoch": 9.86, "learning_rate": 7.983469521931062e-07, "loss": 0.5392, "step": 11660, "task_loss": 0.9250175952911377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6556868553161621, "epoch": 9.86, "learning_rate": 7.936507936507937e-07, "loss": 0.4851, "step": 11661, "task_loss": 1.70505952835083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4861195683479309, "epoch": 9.86, "learning_rate": 7.889546351084814e-07, "loss": 0.4204, "step": 11662, "task_loss": 0.62673020362854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6072658896446228, "epoch": 9.86, "learning_rate": 7.842584765661689e-07, "loss": 0.4895, "step": 11663, "task_loss": 0.9930412173271179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4650421142578125, "epoch": 9.86, "learning_rate": 7.795623180238566e-07, "loss": 0.5628, "step": 11664, "task_loss": 0.6888954639434814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.38302159309387207, "epoch": 9.86, "learning_rate": 7.748661594815442e-07, "loss": 0.5062, "step": 11665, "task_loss": 0.23947152495384216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5047083497047424, "epoch": 9.86, "learning_rate": 7.701700009392318e-07, "loss": 0.4255, "step": 11666, "task_loss": 1.1020257472991943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7551395893096924, "epoch": 9.86, "learning_rate": 7.654738423969194e-07, "loss": 0.5635, "step": 11667, "task_loss": 1.1021616458892822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32474803924560547, "epoch": 9.86, "learning_rate": 7.60777683854607e-07, "loss": 0.532, "step": 11668, "task_loss": 0.7595739364624023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8846448659896851, "epoch": 9.86, "learning_rate": 7.560815253122946e-07, "loss": 0.5088, "step": 11669, "task_loss": 0.9265366792678833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49078765511512756, "epoch": 9.86, "learning_rate": 7.513853667699822e-07, "loss": 0.5372, "step": 11670, "task_loss": 0.651714563369751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4609566628932953, "epoch": 9.87, "learning_rate": 7.466892082276698e-07, "loss": 0.4942, "step": 11671, "task_loss": 0.15753237903118134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.543562650680542, "epoch": 9.87, "learning_rate": 7.419930496853574e-07, "loss": 0.4181, "step": 11672, "task_loss": 0.6874558925628662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45105576515197754, "epoch": 9.87, "learning_rate": 7.37296891143045e-07, "loss": 0.458, "step": 11673, "task_loss": 1.1782443523406982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6178381443023682, "epoch": 9.87, "learning_rate": 7.326007326007326e-07, "loss": 0.6003, "step": 11674, "task_loss": 1.2674778699874878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7507555484771729, "epoch": 9.87, "learning_rate": 7.279045740584202e-07, "loss": 0.5809, "step": 11675, "task_loss": 1.4117413759231567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5576955676078796, "epoch": 9.87, "learning_rate": 7.232084155161078e-07, "loss": 0.6156, "step": 11676, "task_loss": 0.996812105178833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3105067014694214, "epoch": 9.87, "learning_rate": 7.185122569737954e-07, "loss": 0.6004, "step": 11677, "task_loss": 0.0919790044426918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5435044169425964, "epoch": 9.87, "learning_rate": 7.13816098431483e-07, "loss": 0.4457, "step": 11678, "task_loss": 0.6237481832504272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4702163338661194, "epoch": 9.87, "learning_rate": 7.091199398891706e-07, "loss": 0.6088, "step": 11679, "task_loss": 0.8574629426002502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4506922960281372, "epoch": 9.87, "learning_rate": 7.044237813468583e-07, "loss": 0.5515, "step": 11680, "task_loss": 0.6272512674331665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5071322917938232, "epoch": 9.87, "learning_rate": 6.997276228045459e-07, "loss": 0.4835, "step": 11681, "task_loss": 0.22918638586997986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40236300230026245, "epoch": 9.87, "learning_rate": 6.950314642622335e-07, "loss": 0.4981, "step": 11682, "task_loss": 0.1864151507616043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42928484082221985, "epoch": 9.88, "learning_rate": 6.903353057199211e-07, "loss": 0.4935, "step": 11683, "task_loss": 0.5780230760574341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6977165341377258, "epoch": 9.88, "learning_rate": 6.856391471776087e-07, "loss": 0.5043, "step": 11684, "task_loss": 0.9205551147460938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37538614869117737, "epoch": 9.88, "learning_rate": 6.809429886352963e-07, "loss": 0.6094, "step": 11685, "task_loss": 0.9517297744750977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7041528224945068, "epoch": 9.88, "learning_rate": 6.762468300929839e-07, "loss": 0.4238, "step": 11686, "task_loss": 0.3759898245334625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4184136986732483, "epoch": 9.88, "learning_rate": 6.715506715506716e-07, "loss": 0.4543, "step": 11687, "task_loss": 0.17876768112182617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.47411882877349854, "epoch": 9.88, "learning_rate": 6.668545130083592e-07, "loss": 0.5069, "step": 11688, "task_loss": 0.8441336750984192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6882756352424622, "epoch": 9.88, "learning_rate": 6.621583544660469e-07, "loss": 0.5669, "step": 11689, "task_loss": 0.892108678817749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.20733234286308289, "epoch": 9.88, "learning_rate": 6.574621959237345e-07, "loss": 0.3909, "step": 11690, "task_loss": 0.1266690343618393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4415067434310913, "epoch": 9.88, "learning_rate": 6.527660373814221e-07, "loss": 0.4059, "step": 11691, "task_loss": 0.8299891352653503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.24145695567131042, "epoch": 9.88, "learning_rate": 6.480698788391097e-07, "loss": 0.5932, "step": 11692, "task_loss": 1.1455787420272827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3544795513153076, "epoch": 9.88, "learning_rate": 6.433737202967973e-07, "loss": 0.5158, "step": 11693, "task_loss": 0.520611047744751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.374389111995697, "epoch": 9.88, "learning_rate": 6.386775617544849e-07, "loss": 0.4797, "step": 11694, "task_loss": 0.7371553182601929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2623668611049652, "epoch": 9.89, "learning_rate": 6.339814032121725e-07, "loss": 0.4986, "step": 11695, "task_loss": 0.1754637509584427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5546326637268066, "epoch": 9.89, "learning_rate": 6.292852446698601e-07, "loss": 0.541, "step": 11696, "task_loss": 0.5206983089447021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5267376899719238, "epoch": 9.89, "learning_rate": 6.245890861275477e-07, "loss": 0.4997, "step": 11697, "task_loss": 0.49878057837486267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4991374611854553, "epoch": 9.89, "learning_rate": 6.198929275852353e-07, "loss": 0.5526, "step": 11698, "task_loss": 0.8374083042144775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5425406694412231, "epoch": 9.89, "learning_rate": 6.15196769042923e-07, "loss": 0.4969, "step": 11699, "task_loss": 0.3044191896915436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37938475608825684, "epoch": 9.89, "learning_rate": 6.105006105006106e-07, "loss": 0.4241, "step": 11700, "task_loss": 0.3917592465877533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.29360467195510864, "epoch": 9.89, "learning_rate": 6.058044519582982e-07, "loss": 0.3404, "step": 11701, "task_loss": 1.0668026208877563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4008737802505493, "epoch": 9.89, "learning_rate": 6.011082934159858e-07, "loss": 0.4621, "step": 11702, "task_loss": 0.45861732959747314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45021843910217285, "epoch": 9.89, "learning_rate": 5.964121348736734e-07, "loss": 0.5577, "step": 11703, "task_loss": 0.8075315952301025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6247235536575317, "epoch": 9.89, "learning_rate": 5.91715976331361e-07, "loss": 0.59, "step": 11704, "task_loss": 1.1920844316482544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5530127286911011, "epoch": 9.89, "learning_rate": 5.870198177890486e-07, "loss": 0.6956, "step": 11705, "task_loss": 0.5629008412361145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3760971426963806, "epoch": 9.89, "learning_rate": 5.823236592467362e-07, "loss": 0.5614, "step": 11706, "task_loss": 0.15601195394992828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6159166097640991, "epoch": 9.9, "learning_rate": 5.776275007044238e-07, "loss": 0.597, "step": 11707, "task_loss": 0.7166745662689209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5830349326133728, "epoch": 9.9, "learning_rate": 5.729313421621114e-07, "loss": 0.614, "step": 11708, "task_loss": 1.4512584209442139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5437192320823669, "epoch": 9.9, "learning_rate": 5.68235183619799e-07, "loss": 0.7721, "step": 11709, "task_loss": 0.5353873372077942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5190298557281494, "epoch": 9.9, "learning_rate": 5.635390250774866e-07, "loss": 0.6695, "step": 11710, "task_loss": 0.8524925708770752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6376829743385315, "epoch": 9.9, "learning_rate": 5.588428665351742e-07, "loss": 0.6186, "step": 11711, "task_loss": 1.306397557258606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2374061495065689, "epoch": 9.9, "learning_rate": 5.541467079928618e-07, "loss": 0.4345, "step": 11712, "task_loss": 0.46973755955696106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7256037592887878, "epoch": 9.9, "learning_rate": 5.494505494505495e-07, "loss": 0.6185, "step": 11713, "task_loss": 0.5206435322761536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5757043361663818, "epoch": 9.9, "learning_rate": 5.447543909082371e-07, "loss": 0.4838, "step": 11714, "task_loss": 0.8942331671714783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5862898826599121, "epoch": 9.9, "learning_rate": 5.400582323659247e-07, "loss": 0.5182, "step": 11715, "task_loss": 0.8853765726089478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4126562476158142, "epoch": 9.9, "learning_rate": 5.353620738236123e-07, "loss": 0.4236, "step": 11716, "task_loss": 0.7598079442977905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.42852726578712463, "epoch": 9.9, "learning_rate": 5.306659152812999e-07, "loss": 0.6856, "step": 11717, "task_loss": 0.6678671836853027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6689752340316772, "epoch": 9.9, "learning_rate": 5.259697567389875e-07, "loss": 0.4455, "step": 11718, "task_loss": 1.208269715309143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5409055948257446, "epoch": 9.91, "learning_rate": 5.212735981966752e-07, "loss": 0.538, "step": 11719, "task_loss": 0.5290137529373169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7665896415710449, "epoch": 9.91, "learning_rate": 5.165774396543628e-07, "loss": 0.5291, "step": 11720, "task_loss": 1.6194367408752441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44553297758102417, "epoch": 9.91, "learning_rate": 5.118812811120504e-07, "loss": 0.4596, "step": 11721, "task_loss": 0.8014077544212341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5479693412780762, "epoch": 9.91, "learning_rate": 5.07185122569738e-07, "loss": 0.536, "step": 11722, "task_loss": 1.3178647756576538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3408850431442261, "epoch": 9.91, "learning_rate": 5.024889640274256e-07, "loss": 0.5776, "step": 11723, "task_loss": 0.24005983769893646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4757954478263855, "epoch": 9.91, "learning_rate": 4.977928054851132e-07, "loss": 0.5681, "step": 11724, "task_loss": 0.796813428401947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6473527550697327, "epoch": 9.91, "learning_rate": 4.930966469428008e-07, "loss": 0.4796, "step": 11725, "task_loss": 0.36352282762527466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3827659487724304, "epoch": 9.91, "learning_rate": 4.884004884004884e-07, "loss": 0.434, "step": 11726, "task_loss": 0.22757142782211304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6554257869720459, "epoch": 9.91, "learning_rate": 4.83704329858176e-07, "loss": 0.5168, "step": 11727, "task_loss": 0.8382934331893921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4603111147880554, "epoch": 9.91, "learning_rate": 4.790081713158637e-07, "loss": 0.4577, "step": 11728, "task_loss": 1.4271087646484375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6830288171768188, "epoch": 9.91, "learning_rate": 4.7431201277355126e-07, "loss": 0.5492, "step": 11729, "task_loss": 0.6862311959266663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.48706239461898804, "epoch": 9.91, "learning_rate": 4.6961585423123887e-07, "loss": 0.5667, "step": 11730, "task_loss": 1.1102629899978638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.514553427696228, "epoch": 9.92, "learning_rate": 4.649196956889265e-07, "loss": 0.8336, "step": 11731, "task_loss": 0.6839701533317566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 1.1065385341644287, "epoch": 9.92, "learning_rate": 4.602235371466141e-07, "loss": 0.5854, "step": 11732, "task_loss": 1.9425227642059326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7639367580413818, "epoch": 9.92, "learning_rate": 4.555273786043017e-07, "loss": 0.5967, "step": 11733, "task_loss": 0.8367618918418884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26383066177368164, "epoch": 9.92, "learning_rate": 4.508312200619893e-07, "loss": 0.5469, "step": 11734, "task_loss": 0.413463830947876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4512442648410797, "epoch": 9.92, "learning_rate": 4.4613506151967687e-07, "loss": 0.5087, "step": 11735, "task_loss": 0.39502155780792236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2825283706188202, "epoch": 9.92, "learning_rate": 4.414389029773645e-07, "loss": 0.5065, "step": 11736, "task_loss": 0.05941805988550186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5912244319915771, "epoch": 9.92, "learning_rate": 4.367427444350521e-07, "loss": 0.6501, "step": 11737, "task_loss": 0.5189902186393738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5389429330825806, "epoch": 9.92, "learning_rate": 4.320465858927397e-07, "loss": 0.5232, "step": 11738, "task_loss": 0.6558791399002075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3308948278427124, "epoch": 9.92, "learning_rate": 4.273504273504274e-07, "loss": 0.5061, "step": 11739, "task_loss": 0.5623687505722046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39079755544662476, "epoch": 9.92, "learning_rate": 4.22654268808115e-07, "loss": 0.5225, "step": 11740, "task_loss": 0.564607560634613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.37612399458885193, "epoch": 9.92, "learning_rate": 4.1795811026580264e-07, "loss": 0.3597, "step": 11741, "task_loss": 0.6106626987457275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4435874819755554, "epoch": 9.93, "learning_rate": 4.1326195172349025e-07, "loss": 0.4853, "step": 11742, "task_loss": 0.645026445388794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41383856534957886, "epoch": 9.93, "learning_rate": 4.0856579318117786e-07, "loss": 0.5241, "step": 11743, "task_loss": 0.7443991303443909 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.9015700221061707, "epoch": 9.93, "learning_rate": 4.0386963463886547e-07, "loss": 0.7162, "step": 11744, "task_loss": 0.32943806052207947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6842193603515625, "epoch": 9.93, "learning_rate": 3.991734760965531e-07, "loss": 0.6099, "step": 11745, "task_loss": 1.2599283456802368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3419632613658905, "epoch": 9.93, "learning_rate": 3.944773175542407e-07, "loss": 0.4872, "step": 11746, "task_loss": 1.2848331928253174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6058498024940491, "epoch": 9.93, "learning_rate": 3.897811590119283e-07, "loss": 0.6112, "step": 11747, "task_loss": 0.7746321558952332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.257434219121933, "epoch": 9.93, "learning_rate": 3.850850004696159e-07, "loss": 0.6017, "step": 11748, "task_loss": 0.9243019223213196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6796307563781738, "epoch": 9.93, "learning_rate": 3.803888419273035e-07, "loss": 0.438, "step": 11749, "task_loss": 1.5566089153289795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.512998104095459, "epoch": 9.93, "learning_rate": 3.756926833849911e-07, "loss": 0.4867, "step": 11750, "task_loss": 1.144585132598877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.377230703830719, "epoch": 9.93, "learning_rate": 3.709965248426787e-07, "loss": 0.4502, "step": 11751, "task_loss": 1.4658944606781006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3343382775783539, "epoch": 9.93, "learning_rate": 3.663003663003663e-07, "loss": 0.5779, "step": 11752, "task_loss": 0.19127511978149414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6784514784812927, "epoch": 9.93, "learning_rate": 3.616042077580539e-07, "loss": 0.5066, "step": 11753, "task_loss": 0.6241219639778137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3716127276420593, "epoch": 9.94, "learning_rate": 3.569080492157415e-07, "loss": 0.4452, "step": 11754, "task_loss": 0.19366218149662018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.32537204027175903, "epoch": 9.94, "learning_rate": 3.5221189067342913e-07, "loss": 0.4001, "step": 11755, "task_loss": 0.17664626240730286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41187089681625366, "epoch": 9.94, "learning_rate": 3.4751573213111674e-07, "loss": 0.5203, "step": 11756, "task_loss": 0.28928688168525696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6671342849731445, "epoch": 9.94, "learning_rate": 3.4281957358880435e-07, "loss": 0.5187, "step": 11757, "task_loss": 0.29460760951042175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4862149655818939, "epoch": 9.94, "learning_rate": 3.3812341504649196e-07, "loss": 0.4989, "step": 11758, "task_loss": 0.3233022391796112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49517154693603516, "epoch": 9.94, "learning_rate": 3.334272565041796e-07, "loss": 0.4845, "step": 11759, "task_loss": 0.5157822966575623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5504322648048401, "epoch": 9.94, "learning_rate": 3.2873109796186723e-07, "loss": 0.5188, "step": 11760, "task_loss": 0.6025530099868774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5230059027671814, "epoch": 9.94, "learning_rate": 3.2403493941955484e-07, "loss": 0.5011, "step": 11761, "task_loss": 0.318107545375824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5423469543457031, "epoch": 9.94, "learning_rate": 3.1933878087724245e-07, "loss": 0.5369, "step": 11762, "task_loss": 0.336749404668808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4111363887786865, "epoch": 9.94, "learning_rate": 3.1464262233493006e-07, "loss": 0.5257, "step": 11763, "task_loss": 0.6711960434913635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4206879436969757, "epoch": 9.94, "learning_rate": 3.0994646379261767e-07, "loss": 0.6478, "step": 11764, "task_loss": 0.2812909185886383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.40432295203208923, "epoch": 9.94, "learning_rate": 3.052503052503053e-07, "loss": 0.563, "step": 11765, "task_loss": 0.331087201833725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4093506932258606, "epoch": 9.95, "learning_rate": 3.005541467079929e-07, "loss": 0.4459, "step": 11766, "task_loss": 0.2977748215198517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3988795876502991, "epoch": 9.95, "learning_rate": 2.958579881656805e-07, "loss": 0.4129, "step": 11767, "task_loss": 0.1829969435930252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46886324882507324, "epoch": 9.95, "learning_rate": 2.911618296233681e-07, "loss": 0.5842, "step": 11768, "task_loss": 0.5065206289291382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.23979291319847107, "epoch": 9.95, "learning_rate": 2.864656710810557e-07, "loss": 0.4156, "step": 11769, "task_loss": 0.3774416446685791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46815216541290283, "epoch": 9.95, "learning_rate": 2.817695125387433e-07, "loss": 0.4716, "step": 11770, "task_loss": 0.3729710578918457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7470104694366455, "epoch": 9.95, "learning_rate": 2.770733539964309e-07, "loss": 0.4957, "step": 11771, "task_loss": 0.9940965175628662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.31976956129074097, "epoch": 9.95, "learning_rate": 2.7237719545411855e-07, "loss": 0.4844, "step": 11772, "task_loss": 1.2364912033081055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.45727962255477905, "epoch": 9.95, "learning_rate": 2.6768103691180616e-07, "loss": 0.4838, "step": 11773, "task_loss": 0.08624500781297684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6594494581222534, "epoch": 9.95, "learning_rate": 2.629848783694938e-07, "loss": 0.5001, "step": 11774, "task_loss": 1.5781601667404175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4018799960613251, "epoch": 9.95, "learning_rate": 2.582887198271814e-07, "loss": 0.5889, "step": 11775, "task_loss": 1.363936424255371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2549334466457367, "epoch": 9.95, "learning_rate": 2.53592561284869e-07, "loss": 0.507, "step": 11776, "task_loss": 0.7800723910331726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4833108186721802, "epoch": 9.95, "learning_rate": 2.488964027425566e-07, "loss": 0.5853, "step": 11777, "task_loss": 0.8033910989761353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5145341157913208, "epoch": 9.96, "learning_rate": 2.442002442002442e-07, "loss": 0.5721, "step": 11778, "task_loss": 0.7059109210968018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.275804340839386, "epoch": 9.96, "learning_rate": 2.395040856579318e-07, "loss": 0.4794, "step": 11779, "task_loss": 0.6918278932571411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4819774627685547, "epoch": 9.96, "learning_rate": 2.3480792711561944e-07, "loss": 0.577, "step": 11780, "task_loss": 0.740038275718689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4384079873561859, "epoch": 9.96, "learning_rate": 2.3011176857330705e-07, "loss": 0.3908, "step": 11781, "task_loss": 0.5908992290496826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35022008419036865, "epoch": 9.96, "learning_rate": 2.2541561003099466e-07, "loss": 0.4693, "step": 11782, "task_loss": 0.35684171319007874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49198630452156067, "epoch": 9.96, "learning_rate": 2.2071945148868224e-07, "loss": 0.5018, "step": 11783, "task_loss": 0.7499622106552124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3677085041999817, "epoch": 9.96, "learning_rate": 2.1602329294636985e-07, "loss": 0.4458, "step": 11784, "task_loss": 0.3453422486782074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5374306440353394, "epoch": 9.96, "learning_rate": 2.113271344040575e-07, "loss": 0.7184, "step": 11785, "task_loss": 0.547870397567749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4218498170375824, "epoch": 9.96, "learning_rate": 2.0663097586174512e-07, "loss": 0.4156, "step": 11786, "task_loss": 0.17974869906902313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5072303414344788, "epoch": 9.96, "learning_rate": 2.0193481731943273e-07, "loss": 0.5319, "step": 11787, "task_loss": 0.17342707514762878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2168872207403183, "epoch": 9.96, "learning_rate": 1.9723865877712034e-07, "loss": 0.3625, "step": 11788, "task_loss": 0.6192384958267212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4072166979312897, "epoch": 9.96, "learning_rate": 1.9254250023480795e-07, "loss": 0.4139, "step": 11789, "task_loss": 0.2638684809207916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.33316653966903687, "epoch": 9.97, "learning_rate": 1.8784634169249554e-07, "loss": 0.4851, "step": 11790, "task_loss": 0.16370342671871185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3198395073413849, "epoch": 9.97, "learning_rate": 1.8315018315018315e-07, "loss": 0.3528, "step": 11791, "task_loss": 0.5204713940620422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3688572645187378, "epoch": 9.97, "learning_rate": 1.7845402460787076e-07, "loss": 0.4153, "step": 11792, "task_loss": 0.10315749049186707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5872730016708374, "epoch": 9.97, "learning_rate": 1.7375786606555837e-07, "loss": 0.4895, "step": 11793, "task_loss": 0.3097195029258728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3699455261230469, "epoch": 9.97, "learning_rate": 1.6906170752324598e-07, "loss": 0.4904, "step": 11794, "task_loss": 0.5205309987068176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6963080167770386, "epoch": 9.97, "learning_rate": 1.6436554898093362e-07, "loss": 0.6842, "step": 11795, "task_loss": 1.0190958976745605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.532802164554596, "epoch": 9.97, "learning_rate": 1.5966939043862123e-07, "loss": 0.4609, "step": 11796, "task_loss": 0.506659984588623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.26130205392837524, "epoch": 9.97, "learning_rate": 1.5497323189630884e-07, "loss": 0.4141, "step": 11797, "task_loss": 0.6925204992294312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4145433008670807, "epoch": 9.97, "learning_rate": 1.5027707335399645e-07, "loss": 0.49, "step": 11798, "task_loss": 0.7069524526596069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.46688950061798096, "epoch": 9.97, "learning_rate": 1.4558091481168406e-07, "loss": 0.5254, "step": 11799, "task_loss": 1.1364697217941284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4567200541496277, "epoch": 9.97, "learning_rate": 1.4088475626937164e-07, "loss": 0.4961, "step": 11800, "task_loss": 0.16338635981082916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5073204636573792, "epoch": 9.97, "learning_rate": 1.3618859772705928e-07, "loss": 0.4385, "step": 11801, "task_loss": 0.40598607063293457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35134240984916687, "epoch": 9.98, "learning_rate": 1.314924391847469e-07, "loss": 0.3796, "step": 11802, "task_loss": 0.5390300750732422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3988267779350281, "epoch": 9.98, "learning_rate": 1.267962806424345e-07, "loss": 0.4723, "step": 11803, "task_loss": 0.8475526571273804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.39277511835098267, "epoch": 9.98, "learning_rate": 1.221001221001221e-07, "loss": 0.3714, "step": 11804, "task_loss": 0.3133774697780609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.457173228263855, "epoch": 9.98, "learning_rate": 1.1740396355780972e-07, "loss": 0.5216, "step": 11805, "task_loss": 0.4007125496864319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7074745297431946, "epoch": 9.98, "learning_rate": 1.1270780501549733e-07, "loss": 0.4724, "step": 11806, "task_loss": 0.4317512512207031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.545612633228302, "epoch": 9.98, "learning_rate": 1.0801164647318492e-07, "loss": 0.5148, "step": 11807, "task_loss": 0.6539083123207092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.8027074933052063, "epoch": 9.98, "learning_rate": 1.0331548793087256e-07, "loss": 0.4957, "step": 11808, "task_loss": 0.508540153503418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5743611454963684, "epoch": 9.98, "learning_rate": 9.861932938856017e-08, "loss": 0.496, "step": 11809, "task_loss": 0.4208216071128845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7835821509361267, "epoch": 9.98, "learning_rate": 9.392317084624777e-08, "loss": 0.6824, "step": 11810, "task_loss": 1.0997731685638428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6281958818435669, "epoch": 9.98, "learning_rate": 8.922701230393538e-08, "loss": 0.5581, "step": 11811, "task_loss": 1.279786467552185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.44678524136543274, "epoch": 9.98, "learning_rate": 8.453085376162299e-08, "loss": 0.5305, "step": 11812, "task_loss": 0.20385698974132538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.21224243938922882, "epoch": 9.99, "learning_rate": 7.983469521931061e-08, "loss": 0.4486, "step": 11813, "task_loss": 0.4176413118839264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3762721121311188, "epoch": 9.99, "learning_rate": 7.513853667699822e-08, "loss": 0.5199, "step": 11814, "task_loss": 0.4122314453125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.43662840127944946, "epoch": 9.99, "learning_rate": 7.044237813468582e-08, "loss": 0.486, "step": 11815, "task_loss": 0.3087463974952698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5046578645706177, "epoch": 9.99, "learning_rate": 6.574621959237344e-08, "loss": 0.5625, "step": 11816, "task_loss": 1.6193766593933105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.5270827412605286, "epoch": 9.99, "learning_rate": 6.105006105006105e-08, "loss": 0.4392, "step": 11817, "task_loss": 0.9583194255828857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.3815213143825531, "epoch": 9.99, "learning_rate": 5.6353902507748664e-08, "loss": 0.4983, "step": 11818, "task_loss": 0.5789121389389038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4767656922340393, "epoch": 9.99, "learning_rate": 5.165774396543628e-08, "loss": 0.396, "step": 11819, "task_loss": 0.051820576190948486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6256740093231201, "epoch": 9.99, "learning_rate": 4.6961585423123884e-08, "loss": 0.5378, "step": 11820, "task_loss": 1.4378706216812134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.49686819314956665, "epoch": 9.99, "learning_rate": 4.2265426880811495e-08, "loss": 0.4836, "step": 11821, "task_loss": 0.5417465567588806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.424044132232666, "epoch": 9.99, "learning_rate": 3.756926833849911e-08, "loss": 0.4048, "step": 11822, "task_loss": 0.6124973297119141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.2528426945209503, "epoch": 9.99, "learning_rate": 3.287310979618672e-08, "loss": 0.484, "step": 11823, "task_loss": 0.12173671275377274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.575293779373169, "epoch": 9.99, "learning_rate": 2.8176951253874332e-08, "loss": 0.5552, "step": 11824, "task_loss": 1.1922359466552734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4893873631954193, "epoch": 10.0, "learning_rate": 2.3480792711561942e-08, "loss": 0.5191, "step": 11825, "task_loss": 0.11081700772047043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.6046187877655029, "epoch": 10.0, "learning_rate": 1.8784634169249556e-08, "loss": 0.5291, "step": 11826, "task_loss": 0.4708767533302307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.41098475456237793, "epoch": 10.0, "learning_rate": 1.4088475626937166e-08, "loss": 0.6132, "step": 11827, "task_loss": 1.2466886043548584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.7624210119247437, "epoch": 10.0, "learning_rate": 9.392317084624778e-09, "loss": 0.4839, "step": 11828, "task_loss": 1.1490118503570557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.35955220460891724, "epoch": 10.0, "learning_rate": 4.696158542312389e-09, "loss": 0.4475, "step": 11829, "task_loss": 0.6983180046081543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4082770846497614, "compression/movement_sparsity/model_sparsity": 0.39425151078481935, "compression_loss": 0.0, "distillation_loss": 0.4585152268409729, "epoch": 10.0, "learning_rate": 0.0, "loss": 0.5293, "step": 11830, "task_loss": 1.2064448595046997 }, { "epoch": 10.0, "step": 11830, "total_flos": 5.9664632082415714e+19, "train_loss": 25.693809306425052, "train_runtime": 41910.9513, "train_samples_per_second": 18.074, "train_steps_per_second": 0.282 } ], "max_steps": 11830, "num_train_epochs": 10, "total_flos": 5.9664632082415714e+19, "trial_name": null, "trial_params": null }